1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "TargetInfo/AMDGPUTargetInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/SmallBitVector.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/MC/MCAsmInfo.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCExpr.h" 25 #include "llvm/MC/MCInst.h" 26 #include "llvm/MC/MCParser/MCAsmParser.h" 27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 28 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 29 #include "llvm/MC/MCSymbol.h" 30 #include "llvm/Support/AMDGPUMetadata.h" 31 #include "llvm/Support/AMDHSAKernelDescriptor.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/MachineValueType.h" 34 #include "llvm/Support/TargetParser.h" 35 #include "llvm/Support/TargetRegistry.h" 36 37 using namespace llvm; 38 using namespace llvm::AMDGPU; 39 using namespace llvm::amdhsa; 40 41 namespace { 42 43 class AMDGPUAsmParser; 44 45 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 46 47 //===----------------------------------------------------------------------===// 48 // Operand 49 //===----------------------------------------------------------------------===// 50 51 class AMDGPUOperand : public MCParsedAsmOperand { 52 enum KindTy { 53 Token, 54 Immediate, 55 Register, 56 Expression 57 } Kind; 58 59 SMLoc StartLoc, EndLoc; 60 const AMDGPUAsmParser *AsmParser; 61 62 public: 63 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 64 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 65 66 using Ptr = std::unique_ptr<AMDGPUOperand>; 67 68 struct Modifiers { 69 bool Abs = false; 70 bool Neg = false; 71 bool Sext = false; 72 73 bool hasFPModifiers() const { return Abs || Neg; } 74 bool hasIntModifiers() const { return Sext; } 75 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 76 77 int64_t getFPModifiersOperand() const { 78 int64_t Operand = 0; 79 Operand |= Abs ? SISrcMods::ABS : 0u; 80 Operand |= Neg ? SISrcMods::NEG : 0u; 81 return Operand; 82 } 83 84 int64_t getIntModifiersOperand() const { 85 int64_t Operand = 0; 86 Operand |= Sext ? SISrcMods::SEXT : 0u; 87 return Operand; 88 } 89 90 int64_t getModifiersOperand() const { 91 assert(!(hasFPModifiers() && hasIntModifiers()) 92 && "fp and int modifiers should not be used simultaneously"); 93 if (hasFPModifiers()) { 94 return getFPModifiersOperand(); 95 } else if (hasIntModifiers()) { 96 return getIntModifiersOperand(); 97 } else { 98 return 0; 99 } 100 } 101 102 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 103 }; 104 105 enum ImmTy { 106 ImmTyNone, 107 ImmTyGDS, 108 ImmTyLDS, 109 ImmTyOffen, 110 ImmTyIdxen, 111 ImmTyAddr64, 112 ImmTyOffset, 113 ImmTyInstOffset, 114 ImmTyOffset0, 115 ImmTyOffset1, 116 ImmTyDLC, 117 ImmTyGLC, 118 ImmTySLC, 119 ImmTySWZ, 120 ImmTyTFE, 121 ImmTyD16, 122 ImmTyClampSI, 123 ImmTyOModSI, 124 ImmTyDPP8, 125 ImmTyDppCtrl, 126 ImmTyDppRowMask, 127 ImmTyDppBankMask, 128 ImmTyDppBoundCtrl, 129 ImmTyDppFi, 130 ImmTySdwaDstSel, 131 ImmTySdwaSrc0Sel, 132 ImmTySdwaSrc1Sel, 133 ImmTySdwaDstUnused, 134 ImmTyDMask, 135 ImmTyDim, 136 ImmTyUNorm, 137 ImmTyDA, 138 ImmTyR128A16, 139 ImmTyA16, 140 ImmTyLWE, 141 ImmTyExpTgt, 142 ImmTyExpCompr, 143 ImmTyExpVM, 144 ImmTyFORMAT, 145 ImmTyHwreg, 146 ImmTyOff, 147 ImmTySendMsg, 148 ImmTyInterpSlot, 149 ImmTyInterpAttr, 150 ImmTyAttrChan, 151 ImmTyOpSel, 152 ImmTyOpSelHi, 153 ImmTyNegLo, 154 ImmTyNegHi, 155 ImmTySwizzle, 156 ImmTyGprIdxMode, 157 ImmTyHigh, 158 ImmTyBLGP, 159 ImmTyCBSZ, 160 ImmTyABID, 161 ImmTyEndpgm, 162 }; 163 164 enum ImmKindTy { 165 ImmKindTyNone, 166 ImmKindTyLiteral, 167 ImmKindTyConst, 168 }; 169 170 private: 171 struct TokOp { 172 const char *Data; 173 unsigned Length; 174 }; 175 176 struct ImmOp { 177 int64_t Val; 178 ImmTy Type; 179 bool IsFPImm; 180 mutable ImmKindTy Kind; 181 Modifiers Mods; 182 }; 183 184 struct RegOp { 185 unsigned RegNo; 186 Modifiers Mods; 187 }; 188 189 union { 190 TokOp Tok; 191 ImmOp Imm; 192 RegOp Reg; 193 const MCExpr *Expr; 194 }; 195 196 public: 197 bool isToken() const override { 198 if (Kind == Token) 199 return true; 200 201 // When parsing operands, we can't always tell if something was meant to be 202 // a token, like 'gds', or an expression that references a global variable. 203 // In this case, we assume the string is an expression, and if we need to 204 // interpret is a token, then we treat the symbol name as the token. 205 return isSymbolRefExpr(); 206 } 207 208 bool isSymbolRefExpr() const { 209 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 210 } 211 212 bool isImm() const override { 213 return Kind == Immediate; 214 } 215 216 void setImmKindNone() const { 217 assert(isImm()); 218 Imm.Kind = ImmKindTyNone; 219 } 220 221 void setImmKindLiteral() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyLiteral; 224 } 225 226 void setImmKindConst() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyConst; 229 } 230 231 bool IsImmKindLiteral() const { 232 return isImm() && Imm.Kind == ImmKindTyLiteral; 233 } 234 235 bool isImmKindConst() const { 236 return isImm() && Imm.Kind == ImmKindTyConst; 237 } 238 239 bool isInlinableImm(MVT type) const; 240 bool isLiteralImm(MVT type) const; 241 242 bool isRegKind() const { 243 return Kind == Register; 244 } 245 246 bool isReg() const override { 247 return isRegKind() && !hasModifiers(); 248 } 249 250 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 251 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 252 } 253 254 bool isRegOrImmWithInt16InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 256 } 257 258 bool isRegOrImmWithInt32InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 260 } 261 262 bool isRegOrImmWithInt64InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 264 } 265 266 bool isRegOrImmWithFP16InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 268 } 269 270 bool isRegOrImmWithFP32InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 272 } 273 274 bool isRegOrImmWithFP64InputMods() const { 275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 276 } 277 278 bool isVReg() const { 279 return isRegClass(AMDGPU::VGPR_32RegClassID) || 280 isRegClass(AMDGPU::VReg_64RegClassID) || 281 isRegClass(AMDGPU::VReg_96RegClassID) || 282 isRegClass(AMDGPU::VReg_128RegClassID) || 283 isRegClass(AMDGPU::VReg_160RegClassID) || 284 isRegClass(AMDGPU::VReg_192RegClassID) || 285 isRegClass(AMDGPU::VReg_256RegClassID) || 286 isRegClass(AMDGPU::VReg_512RegClassID) || 287 isRegClass(AMDGPU::VReg_1024RegClassID); 288 } 289 290 bool isVReg32() const { 291 return isRegClass(AMDGPU::VGPR_32RegClassID); 292 } 293 294 bool isVReg32OrOff() const { 295 return isOff() || isVReg32(); 296 } 297 298 bool isNull() const { 299 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 300 } 301 302 bool isSDWAOperand(MVT type) const; 303 bool isSDWAFP16Operand() const; 304 bool isSDWAFP32Operand() const; 305 bool isSDWAInt16Operand() const; 306 bool isSDWAInt32Operand() const; 307 308 bool isImmTy(ImmTy ImmT) const { 309 return isImm() && Imm.Type == ImmT; 310 } 311 312 bool isImmModifier() const { 313 return isImm() && Imm.Type != ImmTyNone; 314 } 315 316 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 317 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 318 bool isDMask() const { return isImmTy(ImmTyDMask); } 319 bool isDim() const { return isImmTy(ImmTyDim); } 320 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 321 bool isDA() const { return isImmTy(ImmTyDA); } 322 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 323 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 324 bool isLWE() const { return isImmTy(ImmTyLWE); } 325 bool isOff() const { return isImmTy(ImmTyOff); } 326 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 327 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 328 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 329 bool isOffen() const { return isImmTy(ImmTyOffen); } 330 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 331 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 332 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 333 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 334 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 335 336 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 337 bool isGDS() const { return isImmTy(ImmTyGDS); } 338 bool isLDS() const { return isImmTy(ImmTyLDS); } 339 bool isDLC() const { return isImmTy(ImmTyDLC); } 340 bool isGLC() const { return isImmTy(ImmTyGLC); } 341 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 342 // value of the GLC operand. 343 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 344 bool isSLC() const { return isImmTy(ImmTySLC); } 345 bool isSWZ() const { return isImmTy(ImmTySWZ); } 346 bool isTFE() const { return isImmTy(ImmTyTFE); } 347 bool isD16() const { return isImmTy(ImmTyD16); } 348 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 349 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 350 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 351 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 352 bool isFI() const { return isImmTy(ImmTyDppFi); } 353 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 354 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 355 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 356 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 357 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 358 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 359 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 360 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 361 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 362 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 363 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 364 bool isHigh() const { return isImmTy(ImmTyHigh); } 365 366 bool isMod() const { 367 return isClampSI() || isOModSI(); 368 } 369 370 bool isRegOrImm() const { 371 return isReg() || isImm(); 372 } 373 374 bool isRegClass(unsigned RCID) const; 375 376 bool isInlineValue() const; 377 378 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 379 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 380 } 381 382 bool isSCSrcB16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 384 } 385 386 bool isSCSrcV2B16() const { 387 return isSCSrcB16(); 388 } 389 390 bool isSCSrcB32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 392 } 393 394 bool isSCSrcB64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 396 } 397 398 bool isBoolReg() const; 399 400 bool isSCSrcF16() const { 401 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 402 } 403 404 bool isSCSrcV2F16() const { 405 return isSCSrcF16(); 406 } 407 408 bool isSCSrcF32() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 410 } 411 412 bool isSCSrcF64() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 414 } 415 416 bool isSSrcB32() const { 417 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 418 } 419 420 bool isSSrcB16() const { 421 return isSCSrcB16() || isLiteralImm(MVT::i16); 422 } 423 424 bool isSSrcV2B16() const { 425 llvm_unreachable("cannot happen"); 426 return isSSrcB16(); 427 } 428 429 bool isSSrcB64() const { 430 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 431 // See isVSrc64(). 432 return isSCSrcB64() || isLiteralImm(MVT::i64); 433 } 434 435 bool isSSrcF32() const { 436 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 437 } 438 439 bool isSSrcF64() const { 440 return isSCSrcB64() || isLiteralImm(MVT::f64); 441 } 442 443 bool isSSrcF16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::f16); 445 } 446 447 bool isSSrcV2F16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcF16(); 450 } 451 452 bool isSSrcOrLdsB32() const { 453 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 454 isLiteralImm(MVT::i32) || isExpr(); 455 } 456 457 bool isVCSrcB32() const { 458 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 459 } 460 461 bool isVCSrcB64() const { 462 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 463 } 464 465 bool isVCSrcB16() const { 466 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 467 } 468 469 bool isVCSrcV2B16() const { 470 return isVCSrcB16(); 471 } 472 473 bool isVCSrcF32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 475 } 476 477 bool isVCSrcF64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 479 } 480 481 bool isVCSrcF16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 483 } 484 485 bool isVCSrcV2F16() const { 486 return isVCSrcF16(); 487 } 488 489 bool isVSrcB32() const { 490 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 491 } 492 493 bool isVSrcB64() const { 494 return isVCSrcF64() || isLiteralImm(MVT::i64); 495 } 496 497 bool isVSrcB16() const { 498 return isVCSrcB16() || isLiteralImm(MVT::i16); 499 } 500 501 bool isVSrcV2B16() const { 502 return isVSrcB16() || isLiteralImm(MVT::v2i16); 503 } 504 505 bool isVSrcF32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 507 } 508 509 bool isVSrcF64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::f64); 511 } 512 513 bool isVSrcF16() const { 514 return isVCSrcF16() || isLiteralImm(MVT::f16); 515 } 516 517 bool isVSrcV2F16() const { 518 return isVSrcF16() || isLiteralImm(MVT::v2f16); 519 } 520 521 bool isVISrcB32() const { 522 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 523 } 524 525 bool isVISrcB16() const { 526 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 527 } 528 529 bool isVISrcV2B16() const { 530 return isVISrcB16(); 531 } 532 533 bool isVISrcF32() const { 534 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 535 } 536 537 bool isVISrcF16() const { 538 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 539 } 540 541 bool isVISrcV2F16() const { 542 return isVISrcF16() || isVISrcB32(); 543 } 544 545 bool isAISrcB32() const { 546 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 547 } 548 549 bool isAISrcB16() const { 550 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 551 } 552 553 bool isAISrcV2B16() const { 554 return isAISrcB16(); 555 } 556 557 bool isAISrcF32() const { 558 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 559 } 560 561 bool isAISrcF16() const { 562 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 563 } 564 565 bool isAISrcV2F16() const { 566 return isAISrcF16() || isAISrcB32(); 567 } 568 569 bool isAISrc_128B32() const { 570 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 571 } 572 573 bool isAISrc_128B16() const { 574 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 575 } 576 577 bool isAISrc_128V2B16() const { 578 return isAISrc_128B16(); 579 } 580 581 bool isAISrc_128F32() const { 582 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 583 } 584 585 bool isAISrc_128F16() const { 586 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 587 } 588 589 bool isAISrc_128V2F16() const { 590 return isAISrc_128F16() || isAISrc_128B32(); 591 } 592 593 bool isAISrc_512B32() const { 594 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 595 } 596 597 bool isAISrc_512B16() const { 598 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 599 } 600 601 bool isAISrc_512V2B16() const { 602 return isAISrc_512B16(); 603 } 604 605 bool isAISrc_512F32() const { 606 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 607 } 608 609 bool isAISrc_512F16() const { 610 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 611 } 612 613 bool isAISrc_512V2F16() const { 614 return isAISrc_512F16() || isAISrc_512B32(); 615 } 616 617 bool isAISrc_1024B32() const { 618 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 619 } 620 621 bool isAISrc_1024B16() const { 622 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 623 } 624 625 bool isAISrc_1024V2B16() const { 626 return isAISrc_1024B16(); 627 } 628 629 bool isAISrc_1024F32() const { 630 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 631 } 632 633 bool isAISrc_1024F16() const { 634 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 635 } 636 637 bool isAISrc_1024V2F16() const { 638 return isAISrc_1024F16() || isAISrc_1024B32(); 639 } 640 641 bool isKImmFP32() const { 642 return isLiteralImm(MVT::f32); 643 } 644 645 bool isKImmFP16() const { 646 return isLiteralImm(MVT::f16); 647 } 648 649 bool isMem() const override { 650 return false; 651 } 652 653 bool isExpr() const { 654 return Kind == Expression; 655 } 656 657 bool isSoppBrTarget() const { 658 return isExpr() || isImm(); 659 } 660 661 bool isSWaitCnt() const; 662 bool isHwreg() const; 663 bool isSendMsg() const; 664 bool isSwizzle() const; 665 bool isSMRDOffset8() const; 666 bool isSMEMOffset() const; 667 bool isSMRDLiteralOffset() const; 668 bool isDPP8() const; 669 bool isDPPCtrl() const; 670 bool isBLGP() const; 671 bool isCBSZ() const; 672 bool isABID() const; 673 bool isGPRIdxMode() const; 674 bool isS16Imm() const; 675 bool isU16Imm() const; 676 bool isEndpgm() const; 677 678 StringRef getExpressionAsToken() const { 679 assert(isExpr()); 680 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 681 return S->getSymbol().getName(); 682 } 683 684 StringRef getToken() const { 685 assert(isToken()); 686 687 if (Kind == Expression) 688 return getExpressionAsToken(); 689 690 return StringRef(Tok.Data, Tok.Length); 691 } 692 693 int64_t getImm() const { 694 assert(isImm()); 695 return Imm.Val; 696 } 697 698 void setImm(int64_t Val) { 699 assert(isImm()); 700 Imm.Val = Val; 701 } 702 703 ImmTy getImmTy() const { 704 assert(isImm()); 705 return Imm.Type; 706 } 707 708 unsigned getReg() const override { 709 assert(isRegKind()); 710 return Reg.RegNo; 711 } 712 713 SMLoc getStartLoc() const override { 714 return StartLoc; 715 } 716 717 SMLoc getEndLoc() const override { 718 return EndLoc; 719 } 720 721 SMRange getLocRange() const { 722 return SMRange(StartLoc, EndLoc); 723 } 724 725 Modifiers getModifiers() const { 726 assert(isRegKind() || isImmTy(ImmTyNone)); 727 return isRegKind() ? Reg.Mods : Imm.Mods; 728 } 729 730 void setModifiers(Modifiers Mods) { 731 assert(isRegKind() || isImmTy(ImmTyNone)); 732 if (isRegKind()) 733 Reg.Mods = Mods; 734 else 735 Imm.Mods = Mods; 736 } 737 738 bool hasModifiers() const { 739 return getModifiers().hasModifiers(); 740 } 741 742 bool hasFPModifiers() const { 743 return getModifiers().hasFPModifiers(); 744 } 745 746 bool hasIntModifiers() const { 747 return getModifiers().hasIntModifiers(); 748 } 749 750 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 751 752 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 753 754 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 755 756 template <unsigned Bitwidth> 757 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 758 759 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 760 addKImmFPOperands<16>(Inst, N); 761 } 762 763 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 764 addKImmFPOperands<32>(Inst, N); 765 } 766 767 void addRegOperands(MCInst &Inst, unsigned N) const; 768 769 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 770 addRegOperands(Inst, N); 771 } 772 773 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 774 if (isRegKind()) 775 addRegOperands(Inst, N); 776 else if (isExpr()) 777 Inst.addOperand(MCOperand::createExpr(Expr)); 778 else 779 addImmOperands(Inst, N); 780 } 781 782 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 783 Modifiers Mods = getModifiers(); 784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 785 if (isRegKind()) { 786 addRegOperands(Inst, N); 787 } else { 788 addImmOperands(Inst, N, false); 789 } 790 } 791 792 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 793 assert(!hasIntModifiers()); 794 addRegOrImmWithInputModsOperands(Inst, N); 795 } 796 797 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 798 assert(!hasFPModifiers()); 799 addRegOrImmWithInputModsOperands(Inst, N); 800 } 801 802 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 803 Modifiers Mods = getModifiers(); 804 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 805 assert(isRegKind()); 806 addRegOperands(Inst, N); 807 } 808 809 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 810 assert(!hasIntModifiers()); 811 addRegWithInputModsOperands(Inst, N); 812 } 813 814 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 815 assert(!hasFPModifiers()); 816 addRegWithInputModsOperands(Inst, N); 817 } 818 819 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 820 if (isImm()) 821 addImmOperands(Inst, N); 822 else { 823 assert(isExpr()); 824 Inst.addOperand(MCOperand::createExpr(Expr)); 825 } 826 } 827 828 static void printImmTy(raw_ostream& OS, ImmTy Type) { 829 switch (Type) { 830 case ImmTyNone: OS << "None"; break; 831 case ImmTyGDS: OS << "GDS"; break; 832 case ImmTyLDS: OS << "LDS"; break; 833 case ImmTyOffen: OS << "Offen"; break; 834 case ImmTyIdxen: OS << "Idxen"; break; 835 case ImmTyAddr64: OS << "Addr64"; break; 836 case ImmTyOffset: OS << "Offset"; break; 837 case ImmTyInstOffset: OS << "InstOffset"; break; 838 case ImmTyOffset0: OS << "Offset0"; break; 839 case ImmTyOffset1: OS << "Offset1"; break; 840 case ImmTyDLC: OS << "DLC"; break; 841 case ImmTyGLC: OS << "GLC"; break; 842 case ImmTySLC: OS << "SLC"; break; 843 case ImmTySWZ: OS << "SWZ"; break; 844 case ImmTyTFE: OS << "TFE"; break; 845 case ImmTyD16: OS << "D16"; break; 846 case ImmTyFORMAT: OS << "FORMAT"; break; 847 case ImmTyClampSI: OS << "ClampSI"; break; 848 case ImmTyOModSI: OS << "OModSI"; break; 849 case ImmTyDPP8: OS << "DPP8"; break; 850 case ImmTyDppCtrl: OS << "DppCtrl"; break; 851 case ImmTyDppRowMask: OS << "DppRowMask"; break; 852 case ImmTyDppBankMask: OS << "DppBankMask"; break; 853 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 854 case ImmTyDppFi: OS << "FI"; break; 855 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 856 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 857 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 858 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 859 case ImmTyDMask: OS << "DMask"; break; 860 case ImmTyDim: OS << "Dim"; break; 861 case ImmTyUNorm: OS << "UNorm"; break; 862 case ImmTyDA: OS << "DA"; break; 863 case ImmTyR128A16: OS << "R128A16"; break; 864 case ImmTyA16: OS << "A16"; break; 865 case ImmTyLWE: OS << "LWE"; break; 866 case ImmTyOff: OS << "Off"; break; 867 case ImmTyExpTgt: OS << "ExpTgt"; break; 868 case ImmTyExpCompr: OS << "ExpCompr"; break; 869 case ImmTyExpVM: OS << "ExpVM"; break; 870 case ImmTyHwreg: OS << "Hwreg"; break; 871 case ImmTySendMsg: OS << "SendMsg"; break; 872 case ImmTyInterpSlot: OS << "InterpSlot"; break; 873 case ImmTyInterpAttr: OS << "InterpAttr"; break; 874 case ImmTyAttrChan: OS << "AttrChan"; break; 875 case ImmTyOpSel: OS << "OpSel"; break; 876 case ImmTyOpSelHi: OS << "OpSelHi"; break; 877 case ImmTyNegLo: OS << "NegLo"; break; 878 case ImmTyNegHi: OS << "NegHi"; break; 879 case ImmTySwizzle: OS << "Swizzle"; break; 880 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 881 case ImmTyHigh: OS << "High"; break; 882 case ImmTyBLGP: OS << "BLGP"; break; 883 case ImmTyCBSZ: OS << "CBSZ"; break; 884 case ImmTyABID: OS << "ABID"; break; 885 case ImmTyEndpgm: OS << "Endpgm"; break; 886 } 887 } 888 889 void print(raw_ostream &OS) const override { 890 switch (Kind) { 891 case Register: 892 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 893 break; 894 case Immediate: 895 OS << '<' << getImm(); 896 if (getImmTy() != ImmTyNone) { 897 OS << " type: "; printImmTy(OS, getImmTy()); 898 } 899 OS << " mods: " << Imm.Mods << '>'; 900 break; 901 case Token: 902 OS << '\'' << getToken() << '\''; 903 break; 904 case Expression: 905 OS << "<expr " << *Expr << '>'; 906 break; 907 } 908 } 909 910 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 911 int64_t Val, SMLoc Loc, 912 ImmTy Type = ImmTyNone, 913 bool IsFPImm = false) { 914 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 915 Op->Imm.Val = Val; 916 Op->Imm.IsFPImm = IsFPImm; 917 Op->Imm.Kind = ImmKindTyNone; 918 Op->Imm.Type = Type; 919 Op->Imm.Mods = Modifiers(); 920 Op->StartLoc = Loc; 921 Op->EndLoc = Loc; 922 return Op; 923 } 924 925 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 926 StringRef Str, SMLoc Loc, 927 bool HasExplicitEncodingSize = true) { 928 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 929 Res->Tok.Data = Str.data(); 930 Res->Tok.Length = Str.size(); 931 Res->StartLoc = Loc; 932 Res->EndLoc = Loc; 933 return Res; 934 } 935 936 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 937 unsigned RegNo, SMLoc S, 938 SMLoc E) { 939 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 940 Op->Reg.RegNo = RegNo; 941 Op->Reg.Mods = Modifiers(); 942 Op->StartLoc = S; 943 Op->EndLoc = E; 944 return Op; 945 } 946 947 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 948 const class MCExpr *Expr, SMLoc S) { 949 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 950 Op->Expr = Expr; 951 Op->StartLoc = S; 952 Op->EndLoc = S; 953 return Op; 954 } 955 }; 956 957 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 958 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 959 return OS; 960 } 961 962 //===----------------------------------------------------------------------===// 963 // AsmParser 964 //===----------------------------------------------------------------------===// 965 966 // Holds info related to the current kernel, e.g. count of SGPRs used. 967 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 968 // .amdgpu_hsa_kernel or at EOF. 969 class KernelScopeInfo { 970 int SgprIndexUnusedMin = -1; 971 int VgprIndexUnusedMin = -1; 972 MCContext *Ctx = nullptr; 973 974 void usesSgprAt(int i) { 975 if (i >= SgprIndexUnusedMin) { 976 SgprIndexUnusedMin = ++i; 977 if (Ctx) { 978 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 979 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 980 } 981 } 982 } 983 984 void usesVgprAt(int i) { 985 if (i >= VgprIndexUnusedMin) { 986 VgprIndexUnusedMin = ++i; 987 if (Ctx) { 988 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 989 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 990 } 991 } 992 } 993 994 public: 995 KernelScopeInfo() = default; 996 997 void initialize(MCContext &Context) { 998 Ctx = &Context; 999 usesSgprAt(SgprIndexUnusedMin = -1); 1000 usesVgprAt(VgprIndexUnusedMin = -1); 1001 } 1002 1003 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1004 switch (RegKind) { 1005 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1006 case IS_AGPR: // fall through 1007 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1008 default: break; 1009 } 1010 } 1011 }; 1012 1013 class AMDGPUAsmParser : public MCTargetAsmParser { 1014 MCAsmParser &Parser; 1015 1016 // Number of extra operands parsed after the first optional operand. 1017 // This may be necessary to skip hardcoded mandatory operands. 1018 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1019 1020 unsigned ForcedEncodingSize = 0; 1021 bool ForcedDPP = false; 1022 bool ForcedSDWA = false; 1023 KernelScopeInfo KernelScope; 1024 1025 /// @name Auto-generated Match Functions 1026 /// { 1027 1028 #define GET_ASSEMBLER_HEADER 1029 #include "AMDGPUGenAsmMatcher.inc" 1030 1031 /// } 1032 1033 private: 1034 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1035 bool OutOfRangeError(SMRange Range); 1036 /// Calculate VGPR/SGPR blocks required for given target, reserved 1037 /// registers, and user-specified NextFreeXGPR values. 1038 /// 1039 /// \param Features [in] Target features, used for bug corrections. 1040 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1041 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1042 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1043 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1044 /// descriptor field, if valid. 1045 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1046 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1047 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1048 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1049 /// \param VGPRBlocks [out] Result VGPR block count. 1050 /// \param SGPRBlocks [out] Result SGPR block count. 1051 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1052 bool FlatScrUsed, bool XNACKUsed, 1053 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1054 SMRange VGPRRange, unsigned NextFreeSGPR, 1055 SMRange SGPRRange, unsigned &VGPRBlocks, 1056 unsigned &SGPRBlocks); 1057 bool ParseDirectiveAMDGCNTarget(); 1058 bool ParseDirectiveAMDHSAKernel(); 1059 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1060 bool ParseDirectiveHSACodeObjectVersion(); 1061 bool ParseDirectiveHSACodeObjectISA(); 1062 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1063 bool ParseDirectiveAMDKernelCodeT(); 1064 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1065 bool ParseDirectiveAMDGPUHsaKernel(); 1066 1067 bool ParseDirectiveISAVersion(); 1068 bool ParseDirectiveHSAMetadata(); 1069 bool ParseDirectivePALMetadataBegin(); 1070 bool ParseDirectivePALMetadata(); 1071 bool ParseDirectiveAMDGPULDS(); 1072 1073 /// Common code to parse out a block of text (typically YAML) between start and 1074 /// end directives. 1075 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1076 const char *AssemblerDirectiveEnd, 1077 std::string &CollectString); 1078 1079 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1080 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1081 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1082 unsigned &RegNum, unsigned &RegWidth, 1083 bool RestoreOnFailure = false); 1084 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1085 unsigned &RegNum, unsigned &RegWidth, 1086 SmallVectorImpl<AsmToken> &Tokens); 1087 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1088 unsigned &RegWidth, 1089 SmallVectorImpl<AsmToken> &Tokens); 1090 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1091 unsigned &RegWidth, 1092 SmallVectorImpl<AsmToken> &Tokens); 1093 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1094 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1095 bool ParseRegRange(unsigned& Num, unsigned& Width); 1096 unsigned getRegularReg(RegisterKind RegKind, 1097 unsigned RegNum, 1098 unsigned RegWidth, 1099 SMLoc Loc); 1100 1101 bool isRegister(); 1102 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1103 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1104 void initializeGprCountSymbol(RegisterKind RegKind); 1105 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1106 unsigned RegWidth); 1107 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1108 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1109 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1110 bool IsGdsHardcoded); 1111 1112 public: 1113 enum AMDGPUMatchResultTy { 1114 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1115 }; 1116 enum OperandMode { 1117 OperandMode_Default, 1118 OperandMode_NSA, 1119 }; 1120 1121 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1122 1123 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1124 const MCInstrInfo &MII, 1125 const MCTargetOptions &Options) 1126 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1127 MCAsmParserExtension::Initialize(Parser); 1128 1129 if (getFeatureBits().none()) { 1130 // Set default features. 1131 copySTI().ToggleFeature("southern-islands"); 1132 } 1133 1134 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1135 1136 { 1137 // TODO: make those pre-defined variables read-only. 1138 // Currently there is none suitable machinery in the core llvm-mc for this. 1139 // MCSymbol::isRedefinable is intended for another purpose, and 1140 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1141 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1142 MCContext &Ctx = getContext(); 1143 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1144 MCSymbol *Sym = 1145 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1146 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1147 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1148 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1149 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1150 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1151 } else { 1152 MCSymbol *Sym = 1153 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1154 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1155 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1156 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1157 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1158 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1159 } 1160 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1161 initializeGprCountSymbol(IS_VGPR); 1162 initializeGprCountSymbol(IS_SGPR); 1163 } else 1164 KernelScope.initialize(getContext()); 1165 } 1166 } 1167 1168 bool hasXNACK() const { 1169 return AMDGPU::hasXNACK(getSTI()); 1170 } 1171 1172 bool hasMIMG_R128() const { 1173 return AMDGPU::hasMIMG_R128(getSTI()); 1174 } 1175 1176 bool hasPackedD16() const { 1177 return AMDGPU::hasPackedD16(getSTI()); 1178 } 1179 1180 bool hasGFX10A16() const { 1181 return AMDGPU::hasGFX10A16(getSTI()); 1182 } 1183 1184 bool isSI() const { 1185 return AMDGPU::isSI(getSTI()); 1186 } 1187 1188 bool isCI() const { 1189 return AMDGPU::isCI(getSTI()); 1190 } 1191 1192 bool isVI() const { 1193 return AMDGPU::isVI(getSTI()); 1194 } 1195 1196 bool isGFX9() const { 1197 return AMDGPU::isGFX9(getSTI()); 1198 } 1199 1200 bool isGFX9Plus() const { 1201 return AMDGPU::isGFX9Plus(getSTI()); 1202 } 1203 1204 bool isGFX10() const { 1205 return AMDGPU::isGFX10(getSTI()); 1206 } 1207 1208 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1209 1210 bool isGFX10_BEncoding() const { 1211 return AMDGPU::isGFX10_BEncoding(getSTI()); 1212 } 1213 1214 bool hasInv2PiInlineImm() const { 1215 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1216 } 1217 1218 bool hasFlatOffsets() const { 1219 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1220 } 1221 1222 bool hasSGPR102_SGPR103() const { 1223 return !isVI() && !isGFX9(); 1224 } 1225 1226 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1227 1228 bool hasIntClamp() const { 1229 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1230 } 1231 1232 AMDGPUTargetStreamer &getTargetStreamer() { 1233 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1234 return static_cast<AMDGPUTargetStreamer &>(TS); 1235 } 1236 1237 const MCRegisterInfo *getMRI() const { 1238 // We need this const_cast because for some reason getContext() is not const 1239 // in MCAsmParser. 1240 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1241 } 1242 1243 const MCInstrInfo *getMII() const { 1244 return &MII; 1245 } 1246 1247 const FeatureBitset &getFeatureBits() const { 1248 return getSTI().getFeatureBits(); 1249 } 1250 1251 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1252 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1253 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1254 1255 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1256 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1257 bool isForcedDPP() const { return ForcedDPP; } 1258 bool isForcedSDWA() const { return ForcedSDWA; } 1259 ArrayRef<unsigned> getMatchedVariants() const; 1260 StringRef getMatchedVariantName() const; 1261 1262 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1263 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1264 bool RestoreOnFailure); 1265 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1266 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1267 SMLoc &EndLoc) override; 1268 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1269 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1270 unsigned Kind) override; 1271 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1272 OperandVector &Operands, MCStreamer &Out, 1273 uint64_t &ErrorInfo, 1274 bool MatchingInlineAsm) override; 1275 bool ParseDirective(AsmToken DirectiveID) override; 1276 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1277 OperandMode Mode = OperandMode_Default); 1278 StringRef parseMnemonicSuffix(StringRef Name); 1279 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1280 SMLoc NameLoc, OperandVector &Operands) override; 1281 //bool ProcessInstruction(MCInst &Inst); 1282 1283 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1284 1285 OperandMatchResultTy 1286 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1287 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1288 bool (*ConvertResult)(int64_t &) = nullptr); 1289 1290 OperandMatchResultTy 1291 parseOperandArrayWithPrefix(const char *Prefix, 1292 OperandVector &Operands, 1293 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1294 bool (*ConvertResult)(int64_t&) = nullptr); 1295 1296 OperandMatchResultTy 1297 parseNamedBit(const char *Name, OperandVector &Operands, 1298 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1299 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1300 StringRef &Value); 1301 1302 bool isModifier(); 1303 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1304 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1305 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1306 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1307 bool parseSP3NegModifier(); 1308 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1309 OperandMatchResultTy parseReg(OperandVector &Operands); 1310 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1311 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1312 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1313 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1314 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1315 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1316 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1317 OperandMatchResultTy parseUfmt(int64_t &Format); 1318 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1319 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1320 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1321 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1322 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1323 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1324 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1325 1326 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1327 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1328 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1329 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1330 1331 bool parseCnt(int64_t &IntVal); 1332 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1333 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1334 1335 private: 1336 struct OperandInfoTy { 1337 SMLoc Loc; 1338 int64_t Id; 1339 bool IsSymbolic = false; 1340 bool IsDefined = false; 1341 1342 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1343 }; 1344 1345 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1346 bool validateSendMsg(const OperandInfoTy &Msg, 1347 const OperandInfoTy &Op, 1348 const OperandInfoTy &Stream); 1349 1350 bool parseHwregBody(OperandInfoTy &HwReg, 1351 OperandInfoTy &Offset, 1352 OperandInfoTy &Width); 1353 bool validateHwreg(const OperandInfoTy &HwReg, 1354 const OperandInfoTy &Offset, 1355 const OperandInfoTy &Width); 1356 1357 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1358 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1359 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1360 1361 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1362 const OperandVector &Operands) const; 1363 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1364 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1365 SMLoc getLitLoc(const OperandVector &Operands) const; 1366 SMLoc getConstLoc(const OperandVector &Operands) const; 1367 1368 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1369 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1370 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1371 bool validateSOPLiteral(const MCInst &Inst) const; 1372 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1373 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1374 bool validateIntClampSupported(const MCInst &Inst); 1375 bool validateMIMGAtomicDMask(const MCInst &Inst); 1376 bool validateMIMGGatherDMask(const MCInst &Inst); 1377 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1378 bool validateMIMGDataSize(const MCInst &Inst); 1379 bool validateMIMGAddrSize(const MCInst &Inst); 1380 bool validateMIMGD16(const MCInst &Inst); 1381 bool validateMIMGDim(const MCInst &Inst); 1382 bool validateLdsDirect(const MCInst &Inst); 1383 bool validateOpSel(const MCInst &Inst); 1384 bool validateVccOperand(unsigned Reg) const; 1385 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1386 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1387 bool validateDivScale(const MCInst &Inst); 1388 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1389 const SMLoc &IDLoc); 1390 unsigned getConstantBusLimit(unsigned Opcode) const; 1391 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1392 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1393 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1394 1395 bool isSupportedMnemo(StringRef Mnemo, 1396 const FeatureBitset &FBS); 1397 bool isSupportedMnemo(StringRef Mnemo, 1398 const FeatureBitset &FBS, 1399 ArrayRef<unsigned> Variants); 1400 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1401 1402 bool isId(const StringRef Id) const; 1403 bool isId(const AsmToken &Token, const StringRef Id) const; 1404 bool isToken(const AsmToken::TokenKind Kind) const; 1405 bool trySkipId(const StringRef Id); 1406 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1407 bool trySkipToken(const AsmToken::TokenKind Kind); 1408 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1409 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1410 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1411 1412 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1413 AsmToken::TokenKind getTokenKind() const; 1414 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1415 bool parseExpr(OperandVector &Operands); 1416 StringRef getTokenStr() const; 1417 AsmToken peekToken(); 1418 AsmToken getToken() const; 1419 SMLoc getLoc() const; 1420 void lex(); 1421 1422 public: 1423 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1424 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1425 1426 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1427 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1428 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1429 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1430 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1431 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1432 1433 bool parseSwizzleOperand(int64_t &Op, 1434 const unsigned MinVal, 1435 const unsigned MaxVal, 1436 const StringRef ErrMsg, 1437 SMLoc &Loc); 1438 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1439 const unsigned MinVal, 1440 const unsigned MaxVal, 1441 const StringRef ErrMsg); 1442 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1443 bool parseSwizzleOffset(int64_t &Imm); 1444 bool parseSwizzleMacro(int64_t &Imm); 1445 bool parseSwizzleQuadPerm(int64_t &Imm); 1446 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1447 bool parseSwizzleBroadcast(int64_t &Imm); 1448 bool parseSwizzleSwap(int64_t &Imm); 1449 bool parseSwizzleReverse(int64_t &Imm); 1450 1451 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1452 int64_t parseGPRIdxMacro(); 1453 1454 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1455 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1456 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1457 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1458 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1459 1460 AMDGPUOperand::Ptr defaultDLC() const; 1461 AMDGPUOperand::Ptr defaultGLC() const; 1462 AMDGPUOperand::Ptr defaultGLC_1() const; 1463 AMDGPUOperand::Ptr defaultSLC() const; 1464 1465 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1466 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1467 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1468 AMDGPUOperand::Ptr defaultFlatOffset() const; 1469 1470 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1471 1472 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1473 OptionalImmIndexMap &OptionalIdx); 1474 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1475 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1476 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1477 1478 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1479 1480 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1481 bool IsAtomic = false); 1482 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1483 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1484 1485 OperandMatchResultTy parseDim(OperandVector &Operands); 1486 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1487 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1488 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1489 int64_t parseDPPCtrlSel(StringRef Ctrl); 1490 int64_t parseDPPCtrlPerm(); 1491 AMDGPUOperand::Ptr defaultRowMask() const; 1492 AMDGPUOperand::Ptr defaultBankMask() const; 1493 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1494 AMDGPUOperand::Ptr defaultFI() const; 1495 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1496 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1497 1498 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1499 AMDGPUOperand::ImmTy Type); 1500 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1501 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1502 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1503 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1504 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1505 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1506 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1507 uint64_t BasicInstType, 1508 bool SkipDstVcc = false, 1509 bool SkipSrcVcc = false); 1510 1511 AMDGPUOperand::Ptr defaultBLGP() const; 1512 AMDGPUOperand::Ptr defaultCBSZ() const; 1513 AMDGPUOperand::Ptr defaultABID() const; 1514 1515 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1516 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1517 }; 1518 1519 struct OptionalOperand { 1520 const char *Name; 1521 AMDGPUOperand::ImmTy Type; 1522 bool IsBit; 1523 bool (*ConvertResult)(int64_t&); 1524 }; 1525 1526 } // end anonymous namespace 1527 1528 // May be called with integer type with equivalent bitwidth. 1529 static const fltSemantics *getFltSemantics(unsigned Size) { 1530 switch (Size) { 1531 case 4: 1532 return &APFloat::IEEEsingle(); 1533 case 8: 1534 return &APFloat::IEEEdouble(); 1535 case 2: 1536 return &APFloat::IEEEhalf(); 1537 default: 1538 llvm_unreachable("unsupported fp type"); 1539 } 1540 } 1541 1542 static const fltSemantics *getFltSemantics(MVT VT) { 1543 return getFltSemantics(VT.getSizeInBits() / 8); 1544 } 1545 1546 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1547 switch (OperandType) { 1548 case AMDGPU::OPERAND_REG_IMM_INT32: 1549 case AMDGPU::OPERAND_REG_IMM_FP32: 1550 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1551 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1552 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1553 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1554 return &APFloat::IEEEsingle(); 1555 case AMDGPU::OPERAND_REG_IMM_INT64: 1556 case AMDGPU::OPERAND_REG_IMM_FP64: 1557 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1558 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1559 return &APFloat::IEEEdouble(); 1560 case AMDGPU::OPERAND_REG_IMM_INT16: 1561 case AMDGPU::OPERAND_REG_IMM_FP16: 1562 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1563 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1564 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1565 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1566 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1567 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1568 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1569 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1570 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1571 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1572 return &APFloat::IEEEhalf(); 1573 default: 1574 llvm_unreachable("unsupported fp type"); 1575 } 1576 } 1577 1578 //===----------------------------------------------------------------------===// 1579 // Operand 1580 //===----------------------------------------------------------------------===// 1581 1582 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1583 bool Lost; 1584 1585 // Convert literal to single precision 1586 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1587 APFloat::rmNearestTiesToEven, 1588 &Lost); 1589 // We allow precision lost but not overflow or underflow 1590 if (Status != APFloat::opOK && 1591 Lost && 1592 ((Status & APFloat::opOverflow) != 0 || 1593 (Status & APFloat::opUnderflow) != 0)) { 1594 return false; 1595 } 1596 1597 return true; 1598 } 1599 1600 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1601 return isUIntN(Size, Val) || isIntN(Size, Val); 1602 } 1603 1604 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1605 if (VT.getScalarType() == MVT::i16) { 1606 // FP immediate values are broken. 1607 return isInlinableIntLiteral(Val); 1608 } 1609 1610 // f16/v2f16 operands work correctly for all values. 1611 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1612 } 1613 1614 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1615 1616 // This is a hack to enable named inline values like 1617 // shared_base with both 32-bit and 64-bit operands. 1618 // Note that these values are defined as 1619 // 32-bit operands only. 1620 if (isInlineValue()) { 1621 return true; 1622 } 1623 1624 if (!isImmTy(ImmTyNone)) { 1625 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1626 return false; 1627 } 1628 // TODO: We should avoid using host float here. It would be better to 1629 // check the float bit values which is what a few other places do. 1630 // We've had bot failures before due to weird NaN support on mips hosts. 1631 1632 APInt Literal(64, Imm.Val); 1633 1634 if (Imm.IsFPImm) { // We got fp literal token 1635 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1636 return AMDGPU::isInlinableLiteral64(Imm.Val, 1637 AsmParser->hasInv2PiInlineImm()); 1638 } 1639 1640 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1641 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1642 return false; 1643 1644 if (type.getScalarSizeInBits() == 16) { 1645 return isInlineableLiteralOp16( 1646 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1647 type, AsmParser->hasInv2PiInlineImm()); 1648 } 1649 1650 // Check if single precision literal is inlinable 1651 return AMDGPU::isInlinableLiteral32( 1652 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1653 AsmParser->hasInv2PiInlineImm()); 1654 } 1655 1656 // We got int literal token. 1657 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1658 return AMDGPU::isInlinableLiteral64(Imm.Val, 1659 AsmParser->hasInv2PiInlineImm()); 1660 } 1661 1662 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1663 return false; 1664 } 1665 1666 if (type.getScalarSizeInBits() == 16) { 1667 return isInlineableLiteralOp16( 1668 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1669 type, AsmParser->hasInv2PiInlineImm()); 1670 } 1671 1672 return AMDGPU::isInlinableLiteral32( 1673 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1674 AsmParser->hasInv2PiInlineImm()); 1675 } 1676 1677 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1678 // Check that this immediate can be added as literal 1679 if (!isImmTy(ImmTyNone)) { 1680 return false; 1681 } 1682 1683 if (!Imm.IsFPImm) { 1684 // We got int literal token. 1685 1686 if (type == MVT::f64 && hasFPModifiers()) { 1687 // Cannot apply fp modifiers to int literals preserving the same semantics 1688 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1689 // disable these cases. 1690 return false; 1691 } 1692 1693 unsigned Size = type.getSizeInBits(); 1694 if (Size == 64) 1695 Size = 32; 1696 1697 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1698 // types. 1699 return isSafeTruncation(Imm.Val, Size); 1700 } 1701 1702 // We got fp literal token 1703 if (type == MVT::f64) { // Expected 64-bit fp operand 1704 // We would set low 64-bits of literal to zeroes but we accept this literals 1705 return true; 1706 } 1707 1708 if (type == MVT::i64) { // Expected 64-bit int operand 1709 // We don't allow fp literals in 64-bit integer instructions. It is 1710 // unclear how we should encode them. 1711 return false; 1712 } 1713 1714 // We allow fp literals with f16x2 operands assuming that the specified 1715 // literal goes into the lower half and the upper half is zero. We also 1716 // require that the literal may be losslesly converted to f16. 1717 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1718 (type == MVT::v2i16)? MVT::i16 : type; 1719 1720 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1721 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1722 } 1723 1724 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1725 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1726 } 1727 1728 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1729 if (AsmParser->isVI()) 1730 return isVReg32(); 1731 else if (AsmParser->isGFX9Plus()) 1732 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1733 else 1734 return false; 1735 } 1736 1737 bool AMDGPUOperand::isSDWAFP16Operand() const { 1738 return isSDWAOperand(MVT::f16); 1739 } 1740 1741 bool AMDGPUOperand::isSDWAFP32Operand() const { 1742 return isSDWAOperand(MVT::f32); 1743 } 1744 1745 bool AMDGPUOperand::isSDWAInt16Operand() const { 1746 return isSDWAOperand(MVT::i16); 1747 } 1748 1749 bool AMDGPUOperand::isSDWAInt32Operand() const { 1750 return isSDWAOperand(MVT::i32); 1751 } 1752 1753 bool AMDGPUOperand::isBoolReg() const { 1754 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1755 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1756 } 1757 1758 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1759 { 1760 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1761 assert(Size == 2 || Size == 4 || Size == 8); 1762 1763 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1764 1765 if (Imm.Mods.Abs) { 1766 Val &= ~FpSignMask; 1767 } 1768 if (Imm.Mods.Neg) { 1769 Val ^= FpSignMask; 1770 } 1771 1772 return Val; 1773 } 1774 1775 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1776 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1777 Inst.getNumOperands())) { 1778 addLiteralImmOperand(Inst, Imm.Val, 1779 ApplyModifiers & 1780 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1781 } else { 1782 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1783 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1784 setImmKindNone(); 1785 } 1786 } 1787 1788 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1789 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1790 auto OpNum = Inst.getNumOperands(); 1791 // Check that this operand accepts literals 1792 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1793 1794 if (ApplyModifiers) { 1795 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1796 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1797 Val = applyInputFPModifiers(Val, Size); 1798 } 1799 1800 APInt Literal(64, Val); 1801 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1802 1803 if (Imm.IsFPImm) { // We got fp literal token 1804 switch (OpTy) { 1805 case AMDGPU::OPERAND_REG_IMM_INT64: 1806 case AMDGPU::OPERAND_REG_IMM_FP64: 1807 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1808 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1809 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1810 AsmParser->hasInv2PiInlineImm())) { 1811 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1812 setImmKindConst(); 1813 return; 1814 } 1815 1816 // Non-inlineable 1817 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1818 // For fp operands we check if low 32 bits are zeros 1819 if (Literal.getLoBits(32) != 0) { 1820 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1821 "Can't encode literal as exact 64-bit floating-point operand. " 1822 "Low 32-bits will be set to zero"); 1823 } 1824 1825 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1826 setImmKindLiteral(); 1827 return; 1828 } 1829 1830 // We don't allow fp literals in 64-bit integer instructions. It is 1831 // unclear how we should encode them. This case should be checked earlier 1832 // in predicate methods (isLiteralImm()) 1833 llvm_unreachable("fp literal in 64-bit integer instruction."); 1834 1835 case AMDGPU::OPERAND_REG_IMM_INT32: 1836 case AMDGPU::OPERAND_REG_IMM_FP32: 1837 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1838 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1839 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1840 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1841 case AMDGPU::OPERAND_REG_IMM_INT16: 1842 case AMDGPU::OPERAND_REG_IMM_FP16: 1843 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1844 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1845 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1846 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1847 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1851 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1852 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1853 bool lost; 1854 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1855 // Convert literal to single precision 1856 FPLiteral.convert(*getOpFltSemantics(OpTy), 1857 APFloat::rmNearestTiesToEven, &lost); 1858 // We allow precision lost but not overflow or underflow. This should be 1859 // checked earlier in isLiteralImm() 1860 1861 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1862 Inst.addOperand(MCOperand::createImm(ImmVal)); 1863 setImmKindLiteral(); 1864 return; 1865 } 1866 default: 1867 llvm_unreachable("invalid operand size"); 1868 } 1869 1870 return; 1871 } 1872 1873 // We got int literal token. 1874 // Only sign extend inline immediates. 1875 switch (OpTy) { 1876 case AMDGPU::OPERAND_REG_IMM_INT32: 1877 case AMDGPU::OPERAND_REG_IMM_FP32: 1878 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1879 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1880 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1881 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1882 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1883 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1884 if (isSafeTruncation(Val, 32) && 1885 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1886 AsmParser->hasInv2PiInlineImm())) { 1887 Inst.addOperand(MCOperand::createImm(Val)); 1888 setImmKindConst(); 1889 return; 1890 } 1891 1892 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1893 setImmKindLiteral(); 1894 return; 1895 1896 case AMDGPU::OPERAND_REG_IMM_INT64: 1897 case AMDGPU::OPERAND_REG_IMM_FP64: 1898 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1899 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1900 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1901 Inst.addOperand(MCOperand::createImm(Val)); 1902 setImmKindConst(); 1903 return; 1904 } 1905 1906 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1907 setImmKindLiteral(); 1908 return; 1909 1910 case AMDGPU::OPERAND_REG_IMM_INT16: 1911 case AMDGPU::OPERAND_REG_IMM_FP16: 1912 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1913 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1914 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1915 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1916 if (isSafeTruncation(Val, 16) && 1917 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1918 AsmParser->hasInv2PiInlineImm())) { 1919 Inst.addOperand(MCOperand::createImm(Val)); 1920 setImmKindConst(); 1921 return; 1922 } 1923 1924 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1925 setImmKindLiteral(); 1926 return; 1927 1928 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1929 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1930 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1931 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1932 assert(isSafeTruncation(Val, 16)); 1933 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1934 AsmParser->hasInv2PiInlineImm())); 1935 1936 Inst.addOperand(MCOperand::createImm(Val)); 1937 return; 1938 } 1939 default: 1940 llvm_unreachable("invalid operand size"); 1941 } 1942 } 1943 1944 template <unsigned Bitwidth> 1945 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1946 APInt Literal(64, Imm.Val); 1947 setImmKindNone(); 1948 1949 if (!Imm.IsFPImm) { 1950 // We got int literal token. 1951 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1952 return; 1953 } 1954 1955 bool Lost; 1956 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1957 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1958 APFloat::rmNearestTiesToEven, &Lost); 1959 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1960 } 1961 1962 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1963 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1964 } 1965 1966 static bool isInlineValue(unsigned Reg) { 1967 switch (Reg) { 1968 case AMDGPU::SRC_SHARED_BASE: 1969 case AMDGPU::SRC_SHARED_LIMIT: 1970 case AMDGPU::SRC_PRIVATE_BASE: 1971 case AMDGPU::SRC_PRIVATE_LIMIT: 1972 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1973 return true; 1974 case AMDGPU::SRC_VCCZ: 1975 case AMDGPU::SRC_EXECZ: 1976 case AMDGPU::SRC_SCC: 1977 return true; 1978 case AMDGPU::SGPR_NULL: 1979 return true; 1980 default: 1981 return false; 1982 } 1983 } 1984 1985 bool AMDGPUOperand::isInlineValue() const { 1986 return isRegKind() && ::isInlineValue(getReg()); 1987 } 1988 1989 //===----------------------------------------------------------------------===// 1990 // AsmParser 1991 //===----------------------------------------------------------------------===// 1992 1993 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1994 if (Is == IS_VGPR) { 1995 switch (RegWidth) { 1996 default: return -1; 1997 case 1: return AMDGPU::VGPR_32RegClassID; 1998 case 2: return AMDGPU::VReg_64RegClassID; 1999 case 3: return AMDGPU::VReg_96RegClassID; 2000 case 4: return AMDGPU::VReg_128RegClassID; 2001 case 5: return AMDGPU::VReg_160RegClassID; 2002 case 6: return AMDGPU::VReg_192RegClassID; 2003 case 8: return AMDGPU::VReg_256RegClassID; 2004 case 16: return AMDGPU::VReg_512RegClassID; 2005 case 32: return AMDGPU::VReg_1024RegClassID; 2006 } 2007 } else if (Is == IS_TTMP) { 2008 switch (RegWidth) { 2009 default: return -1; 2010 case 1: return AMDGPU::TTMP_32RegClassID; 2011 case 2: return AMDGPU::TTMP_64RegClassID; 2012 case 4: return AMDGPU::TTMP_128RegClassID; 2013 case 8: return AMDGPU::TTMP_256RegClassID; 2014 case 16: return AMDGPU::TTMP_512RegClassID; 2015 } 2016 } else if (Is == IS_SGPR) { 2017 switch (RegWidth) { 2018 default: return -1; 2019 case 1: return AMDGPU::SGPR_32RegClassID; 2020 case 2: return AMDGPU::SGPR_64RegClassID; 2021 case 3: return AMDGPU::SGPR_96RegClassID; 2022 case 4: return AMDGPU::SGPR_128RegClassID; 2023 case 5: return AMDGPU::SGPR_160RegClassID; 2024 case 6: return AMDGPU::SGPR_192RegClassID; 2025 case 8: return AMDGPU::SGPR_256RegClassID; 2026 case 16: return AMDGPU::SGPR_512RegClassID; 2027 } 2028 } else if (Is == IS_AGPR) { 2029 switch (RegWidth) { 2030 default: return -1; 2031 case 1: return AMDGPU::AGPR_32RegClassID; 2032 case 2: return AMDGPU::AReg_64RegClassID; 2033 case 3: return AMDGPU::AReg_96RegClassID; 2034 case 4: return AMDGPU::AReg_128RegClassID; 2035 case 5: return AMDGPU::AReg_160RegClassID; 2036 case 6: return AMDGPU::AReg_192RegClassID; 2037 case 8: return AMDGPU::AReg_256RegClassID; 2038 case 16: return AMDGPU::AReg_512RegClassID; 2039 case 32: return AMDGPU::AReg_1024RegClassID; 2040 } 2041 } 2042 return -1; 2043 } 2044 2045 static unsigned getSpecialRegForName(StringRef RegName) { 2046 return StringSwitch<unsigned>(RegName) 2047 .Case("exec", AMDGPU::EXEC) 2048 .Case("vcc", AMDGPU::VCC) 2049 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2050 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2051 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2052 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2053 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2054 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2055 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2056 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2057 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2058 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2059 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2060 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2061 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2062 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2063 .Case("m0", AMDGPU::M0) 2064 .Case("vccz", AMDGPU::SRC_VCCZ) 2065 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2066 .Case("execz", AMDGPU::SRC_EXECZ) 2067 .Case("src_execz", AMDGPU::SRC_EXECZ) 2068 .Case("scc", AMDGPU::SRC_SCC) 2069 .Case("src_scc", AMDGPU::SRC_SCC) 2070 .Case("tba", AMDGPU::TBA) 2071 .Case("tma", AMDGPU::TMA) 2072 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2073 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2074 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2075 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2076 .Case("vcc_lo", AMDGPU::VCC_LO) 2077 .Case("vcc_hi", AMDGPU::VCC_HI) 2078 .Case("exec_lo", AMDGPU::EXEC_LO) 2079 .Case("exec_hi", AMDGPU::EXEC_HI) 2080 .Case("tma_lo", AMDGPU::TMA_LO) 2081 .Case("tma_hi", AMDGPU::TMA_HI) 2082 .Case("tba_lo", AMDGPU::TBA_LO) 2083 .Case("tba_hi", AMDGPU::TBA_HI) 2084 .Case("pc", AMDGPU::PC_REG) 2085 .Case("null", AMDGPU::SGPR_NULL) 2086 .Default(AMDGPU::NoRegister); 2087 } 2088 2089 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2090 SMLoc &EndLoc, bool RestoreOnFailure) { 2091 auto R = parseRegister(); 2092 if (!R) return true; 2093 assert(R->isReg()); 2094 RegNo = R->getReg(); 2095 StartLoc = R->getStartLoc(); 2096 EndLoc = R->getEndLoc(); 2097 return false; 2098 } 2099 2100 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2101 SMLoc &EndLoc) { 2102 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2103 } 2104 2105 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2106 SMLoc &StartLoc, 2107 SMLoc &EndLoc) { 2108 bool Result = 2109 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2110 bool PendingErrors = getParser().hasPendingError(); 2111 getParser().clearPendingErrors(); 2112 if (PendingErrors) 2113 return MatchOperand_ParseFail; 2114 if (Result) 2115 return MatchOperand_NoMatch; 2116 return MatchOperand_Success; 2117 } 2118 2119 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2120 RegisterKind RegKind, unsigned Reg1, 2121 SMLoc Loc) { 2122 switch (RegKind) { 2123 case IS_SPECIAL: 2124 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2125 Reg = AMDGPU::EXEC; 2126 RegWidth = 2; 2127 return true; 2128 } 2129 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2130 Reg = AMDGPU::FLAT_SCR; 2131 RegWidth = 2; 2132 return true; 2133 } 2134 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2135 Reg = AMDGPU::XNACK_MASK; 2136 RegWidth = 2; 2137 return true; 2138 } 2139 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2140 Reg = AMDGPU::VCC; 2141 RegWidth = 2; 2142 return true; 2143 } 2144 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2145 Reg = AMDGPU::TBA; 2146 RegWidth = 2; 2147 return true; 2148 } 2149 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2150 Reg = AMDGPU::TMA; 2151 RegWidth = 2; 2152 return true; 2153 } 2154 Error(Loc, "register does not fit in the list"); 2155 return false; 2156 case IS_VGPR: 2157 case IS_SGPR: 2158 case IS_AGPR: 2159 case IS_TTMP: 2160 if (Reg1 != Reg + RegWidth) { 2161 Error(Loc, "registers in a list must have consecutive indices"); 2162 return false; 2163 } 2164 RegWidth++; 2165 return true; 2166 default: 2167 llvm_unreachable("unexpected register kind"); 2168 } 2169 } 2170 2171 struct RegInfo { 2172 StringLiteral Name; 2173 RegisterKind Kind; 2174 }; 2175 2176 static constexpr RegInfo RegularRegisters[] = { 2177 {{"v"}, IS_VGPR}, 2178 {{"s"}, IS_SGPR}, 2179 {{"ttmp"}, IS_TTMP}, 2180 {{"acc"}, IS_AGPR}, 2181 {{"a"}, IS_AGPR}, 2182 }; 2183 2184 static bool isRegularReg(RegisterKind Kind) { 2185 return Kind == IS_VGPR || 2186 Kind == IS_SGPR || 2187 Kind == IS_TTMP || 2188 Kind == IS_AGPR; 2189 } 2190 2191 static const RegInfo* getRegularRegInfo(StringRef Str) { 2192 for (const RegInfo &Reg : RegularRegisters) 2193 if (Str.startswith(Reg.Name)) 2194 return &Reg; 2195 return nullptr; 2196 } 2197 2198 static bool getRegNum(StringRef Str, unsigned& Num) { 2199 return !Str.getAsInteger(10, Num); 2200 } 2201 2202 bool 2203 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2204 const AsmToken &NextToken) const { 2205 2206 // A list of consecutive registers: [s0,s1,s2,s3] 2207 if (Token.is(AsmToken::LBrac)) 2208 return true; 2209 2210 if (!Token.is(AsmToken::Identifier)) 2211 return false; 2212 2213 // A single register like s0 or a range of registers like s[0:1] 2214 2215 StringRef Str = Token.getString(); 2216 const RegInfo *Reg = getRegularRegInfo(Str); 2217 if (Reg) { 2218 StringRef RegName = Reg->Name; 2219 StringRef RegSuffix = Str.substr(RegName.size()); 2220 if (!RegSuffix.empty()) { 2221 unsigned Num; 2222 // A single register with an index: rXX 2223 if (getRegNum(RegSuffix, Num)) 2224 return true; 2225 } else { 2226 // A range of registers: r[XX:YY]. 2227 if (NextToken.is(AsmToken::LBrac)) 2228 return true; 2229 } 2230 } 2231 2232 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2233 } 2234 2235 bool 2236 AMDGPUAsmParser::isRegister() 2237 { 2238 return isRegister(getToken(), peekToken()); 2239 } 2240 2241 unsigned 2242 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2243 unsigned RegNum, 2244 unsigned RegWidth, 2245 SMLoc Loc) { 2246 2247 assert(isRegularReg(RegKind)); 2248 2249 unsigned AlignSize = 1; 2250 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2251 // SGPR and TTMP registers must be aligned. 2252 // Max required alignment is 4 dwords. 2253 AlignSize = std::min(RegWidth, 4u); 2254 } 2255 2256 if (RegNum % AlignSize != 0) { 2257 Error(Loc, "invalid register alignment"); 2258 return AMDGPU::NoRegister; 2259 } 2260 2261 unsigned RegIdx = RegNum / AlignSize; 2262 int RCID = getRegClass(RegKind, RegWidth); 2263 if (RCID == -1) { 2264 Error(Loc, "invalid or unsupported register size"); 2265 return AMDGPU::NoRegister; 2266 } 2267 2268 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2269 const MCRegisterClass RC = TRI->getRegClass(RCID); 2270 if (RegIdx >= RC.getNumRegs()) { 2271 Error(Loc, "register index is out of range"); 2272 return AMDGPU::NoRegister; 2273 } 2274 2275 return RC.getRegister(RegIdx); 2276 } 2277 2278 bool 2279 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2280 int64_t RegLo, RegHi; 2281 if (!skipToken(AsmToken::LBrac, "missing register index")) 2282 return false; 2283 2284 SMLoc FirstIdxLoc = getLoc(); 2285 SMLoc SecondIdxLoc; 2286 2287 if (!parseExpr(RegLo)) 2288 return false; 2289 2290 if (trySkipToken(AsmToken::Colon)) { 2291 SecondIdxLoc = getLoc(); 2292 if (!parseExpr(RegHi)) 2293 return false; 2294 } else { 2295 RegHi = RegLo; 2296 } 2297 2298 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2299 return false; 2300 2301 if (!isUInt<32>(RegLo)) { 2302 Error(FirstIdxLoc, "invalid register index"); 2303 return false; 2304 } 2305 2306 if (!isUInt<32>(RegHi)) { 2307 Error(SecondIdxLoc, "invalid register index"); 2308 return false; 2309 } 2310 2311 if (RegLo > RegHi) { 2312 Error(FirstIdxLoc, "first register index should not exceed second index"); 2313 return false; 2314 } 2315 2316 Num = static_cast<unsigned>(RegLo); 2317 Width = (RegHi - RegLo) + 1; 2318 return true; 2319 } 2320 2321 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2322 unsigned &RegNum, unsigned &RegWidth, 2323 SmallVectorImpl<AsmToken> &Tokens) { 2324 assert(isToken(AsmToken::Identifier)); 2325 unsigned Reg = getSpecialRegForName(getTokenStr()); 2326 if (Reg) { 2327 RegNum = 0; 2328 RegWidth = 1; 2329 RegKind = IS_SPECIAL; 2330 Tokens.push_back(getToken()); 2331 lex(); // skip register name 2332 } 2333 return Reg; 2334 } 2335 2336 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2337 unsigned &RegNum, unsigned &RegWidth, 2338 SmallVectorImpl<AsmToken> &Tokens) { 2339 assert(isToken(AsmToken::Identifier)); 2340 StringRef RegName = getTokenStr(); 2341 auto Loc = getLoc(); 2342 2343 const RegInfo *RI = getRegularRegInfo(RegName); 2344 if (!RI) { 2345 Error(Loc, "invalid register name"); 2346 return AMDGPU::NoRegister; 2347 } 2348 2349 Tokens.push_back(getToken()); 2350 lex(); // skip register name 2351 2352 RegKind = RI->Kind; 2353 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2354 if (!RegSuffix.empty()) { 2355 // Single 32-bit register: vXX. 2356 if (!getRegNum(RegSuffix, RegNum)) { 2357 Error(Loc, "invalid register index"); 2358 return AMDGPU::NoRegister; 2359 } 2360 RegWidth = 1; 2361 } else { 2362 // Range of registers: v[XX:YY]. ":YY" is optional. 2363 if (!ParseRegRange(RegNum, RegWidth)) 2364 return AMDGPU::NoRegister; 2365 } 2366 2367 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2368 } 2369 2370 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2371 unsigned &RegWidth, 2372 SmallVectorImpl<AsmToken> &Tokens) { 2373 unsigned Reg = AMDGPU::NoRegister; 2374 auto ListLoc = getLoc(); 2375 2376 if (!skipToken(AsmToken::LBrac, 2377 "expected a register or a list of registers")) { 2378 return AMDGPU::NoRegister; 2379 } 2380 2381 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2382 2383 auto Loc = getLoc(); 2384 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2385 return AMDGPU::NoRegister; 2386 if (RegWidth != 1) { 2387 Error(Loc, "expected a single 32-bit register"); 2388 return AMDGPU::NoRegister; 2389 } 2390 2391 for (; trySkipToken(AsmToken::Comma); ) { 2392 RegisterKind NextRegKind; 2393 unsigned NextReg, NextRegNum, NextRegWidth; 2394 Loc = getLoc(); 2395 2396 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2397 NextRegNum, NextRegWidth, 2398 Tokens)) { 2399 return AMDGPU::NoRegister; 2400 } 2401 if (NextRegWidth != 1) { 2402 Error(Loc, "expected a single 32-bit register"); 2403 return AMDGPU::NoRegister; 2404 } 2405 if (NextRegKind != RegKind) { 2406 Error(Loc, "registers in a list must be of the same kind"); 2407 return AMDGPU::NoRegister; 2408 } 2409 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2410 return AMDGPU::NoRegister; 2411 } 2412 2413 if (!skipToken(AsmToken::RBrac, 2414 "expected a comma or a closing square bracket")) { 2415 return AMDGPU::NoRegister; 2416 } 2417 2418 if (isRegularReg(RegKind)) 2419 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2420 2421 return Reg; 2422 } 2423 2424 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2425 unsigned &RegNum, unsigned &RegWidth, 2426 SmallVectorImpl<AsmToken> &Tokens) { 2427 auto Loc = getLoc(); 2428 Reg = AMDGPU::NoRegister; 2429 2430 if (isToken(AsmToken::Identifier)) { 2431 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2432 if (Reg == AMDGPU::NoRegister) 2433 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2434 } else { 2435 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2436 } 2437 2438 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2439 if (Reg == AMDGPU::NoRegister) { 2440 assert(Parser.hasPendingError()); 2441 return false; 2442 } 2443 2444 if (!subtargetHasRegister(*TRI, Reg)) { 2445 if (Reg == AMDGPU::SGPR_NULL) { 2446 Error(Loc, "'null' operand is not supported on this GPU"); 2447 } else { 2448 Error(Loc, "register not available on this GPU"); 2449 } 2450 return false; 2451 } 2452 2453 return true; 2454 } 2455 2456 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2457 unsigned &RegNum, unsigned &RegWidth, 2458 bool RestoreOnFailure /*=false*/) { 2459 Reg = AMDGPU::NoRegister; 2460 2461 SmallVector<AsmToken, 1> Tokens; 2462 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2463 if (RestoreOnFailure) { 2464 while (!Tokens.empty()) { 2465 getLexer().UnLex(Tokens.pop_back_val()); 2466 } 2467 } 2468 return true; 2469 } 2470 return false; 2471 } 2472 2473 Optional<StringRef> 2474 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2475 switch (RegKind) { 2476 case IS_VGPR: 2477 return StringRef(".amdgcn.next_free_vgpr"); 2478 case IS_SGPR: 2479 return StringRef(".amdgcn.next_free_sgpr"); 2480 default: 2481 return None; 2482 } 2483 } 2484 2485 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2486 auto SymbolName = getGprCountSymbolName(RegKind); 2487 assert(SymbolName && "initializing invalid register kind"); 2488 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2489 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2490 } 2491 2492 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2493 unsigned DwordRegIndex, 2494 unsigned RegWidth) { 2495 // Symbols are only defined for GCN targets 2496 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2497 return true; 2498 2499 auto SymbolName = getGprCountSymbolName(RegKind); 2500 if (!SymbolName) 2501 return true; 2502 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2503 2504 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2505 int64_t OldCount; 2506 2507 if (!Sym->isVariable()) 2508 return !Error(getLoc(), 2509 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2510 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2511 return !Error( 2512 getLoc(), 2513 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2514 2515 if (OldCount <= NewMax) 2516 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2517 2518 return true; 2519 } 2520 2521 std::unique_ptr<AMDGPUOperand> 2522 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2523 const auto &Tok = getToken(); 2524 SMLoc StartLoc = Tok.getLoc(); 2525 SMLoc EndLoc = Tok.getEndLoc(); 2526 RegisterKind RegKind; 2527 unsigned Reg, RegNum, RegWidth; 2528 2529 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2530 return nullptr; 2531 } 2532 if (isHsaAbiVersion3(&getSTI())) { 2533 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2534 return nullptr; 2535 } else 2536 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2537 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2538 } 2539 2540 OperandMatchResultTy 2541 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2542 // TODO: add syntactic sugar for 1/(2*PI) 2543 2544 assert(!isRegister()); 2545 assert(!isModifier()); 2546 2547 const auto& Tok = getToken(); 2548 const auto& NextTok = peekToken(); 2549 bool IsReal = Tok.is(AsmToken::Real); 2550 SMLoc S = getLoc(); 2551 bool Negate = false; 2552 2553 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2554 lex(); 2555 IsReal = true; 2556 Negate = true; 2557 } 2558 2559 if (IsReal) { 2560 // Floating-point expressions are not supported. 2561 // Can only allow floating-point literals with an 2562 // optional sign. 2563 2564 StringRef Num = getTokenStr(); 2565 lex(); 2566 2567 APFloat RealVal(APFloat::IEEEdouble()); 2568 auto roundMode = APFloat::rmNearestTiesToEven; 2569 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2570 return MatchOperand_ParseFail; 2571 } 2572 if (Negate) 2573 RealVal.changeSign(); 2574 2575 Operands.push_back( 2576 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2577 AMDGPUOperand::ImmTyNone, true)); 2578 2579 return MatchOperand_Success; 2580 2581 } else { 2582 int64_t IntVal; 2583 const MCExpr *Expr; 2584 SMLoc S = getLoc(); 2585 2586 if (HasSP3AbsModifier) { 2587 // This is a workaround for handling expressions 2588 // as arguments of SP3 'abs' modifier, for example: 2589 // |1.0| 2590 // |-1| 2591 // |1+x| 2592 // This syntax is not compatible with syntax of standard 2593 // MC expressions (due to the trailing '|'). 2594 SMLoc EndLoc; 2595 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2596 return MatchOperand_ParseFail; 2597 } else { 2598 if (Parser.parseExpression(Expr)) 2599 return MatchOperand_ParseFail; 2600 } 2601 2602 if (Expr->evaluateAsAbsolute(IntVal)) { 2603 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2604 } else { 2605 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2606 } 2607 2608 return MatchOperand_Success; 2609 } 2610 2611 return MatchOperand_NoMatch; 2612 } 2613 2614 OperandMatchResultTy 2615 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2616 if (!isRegister()) 2617 return MatchOperand_NoMatch; 2618 2619 if (auto R = parseRegister()) { 2620 assert(R->isReg()); 2621 Operands.push_back(std::move(R)); 2622 return MatchOperand_Success; 2623 } 2624 return MatchOperand_ParseFail; 2625 } 2626 2627 OperandMatchResultTy 2628 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2629 auto res = parseReg(Operands); 2630 if (res != MatchOperand_NoMatch) { 2631 return res; 2632 } else if (isModifier()) { 2633 return MatchOperand_NoMatch; 2634 } else { 2635 return parseImm(Operands, HasSP3AbsMod); 2636 } 2637 } 2638 2639 bool 2640 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2641 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2642 const auto &str = Token.getString(); 2643 return str == "abs" || str == "neg" || str == "sext"; 2644 } 2645 return false; 2646 } 2647 2648 bool 2649 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2650 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2651 } 2652 2653 bool 2654 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2655 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2656 } 2657 2658 bool 2659 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2660 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2661 } 2662 2663 // Check if this is an operand modifier or an opcode modifier 2664 // which may look like an expression but it is not. We should 2665 // avoid parsing these modifiers as expressions. Currently 2666 // recognized sequences are: 2667 // |...| 2668 // abs(...) 2669 // neg(...) 2670 // sext(...) 2671 // -reg 2672 // -|...| 2673 // -abs(...) 2674 // name:... 2675 // Note that simple opcode modifiers like 'gds' may be parsed as 2676 // expressions; this is a special case. See getExpressionAsToken. 2677 // 2678 bool 2679 AMDGPUAsmParser::isModifier() { 2680 2681 AsmToken Tok = getToken(); 2682 AsmToken NextToken[2]; 2683 peekTokens(NextToken); 2684 2685 return isOperandModifier(Tok, NextToken[0]) || 2686 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2687 isOpcodeModifierWithVal(Tok, NextToken[0]); 2688 } 2689 2690 // Check if the current token is an SP3 'neg' modifier. 2691 // Currently this modifier is allowed in the following context: 2692 // 2693 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2694 // 2. Before an 'abs' modifier: -abs(...) 2695 // 3. Before an SP3 'abs' modifier: -|...| 2696 // 2697 // In all other cases "-" is handled as a part 2698 // of an expression that follows the sign. 2699 // 2700 // Note: When "-" is followed by an integer literal, 2701 // this is interpreted as integer negation rather 2702 // than a floating-point NEG modifier applied to N. 2703 // Beside being contr-intuitive, such use of floating-point 2704 // NEG modifier would have resulted in different meaning 2705 // of integer literals used with VOP1/2/C and VOP3, 2706 // for example: 2707 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2708 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2709 // Negative fp literals with preceding "-" are 2710 // handled likewise for unifomtity 2711 // 2712 bool 2713 AMDGPUAsmParser::parseSP3NegModifier() { 2714 2715 AsmToken NextToken[2]; 2716 peekTokens(NextToken); 2717 2718 if (isToken(AsmToken::Minus) && 2719 (isRegister(NextToken[0], NextToken[1]) || 2720 NextToken[0].is(AsmToken::Pipe) || 2721 isId(NextToken[0], "abs"))) { 2722 lex(); 2723 return true; 2724 } 2725 2726 return false; 2727 } 2728 2729 OperandMatchResultTy 2730 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2731 bool AllowImm) { 2732 bool Neg, SP3Neg; 2733 bool Abs, SP3Abs; 2734 SMLoc Loc; 2735 2736 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2737 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2738 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2739 return MatchOperand_ParseFail; 2740 } 2741 2742 SP3Neg = parseSP3NegModifier(); 2743 2744 Loc = getLoc(); 2745 Neg = trySkipId("neg"); 2746 if (Neg && SP3Neg) { 2747 Error(Loc, "expected register or immediate"); 2748 return MatchOperand_ParseFail; 2749 } 2750 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2751 return MatchOperand_ParseFail; 2752 2753 Abs = trySkipId("abs"); 2754 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2755 return MatchOperand_ParseFail; 2756 2757 Loc = getLoc(); 2758 SP3Abs = trySkipToken(AsmToken::Pipe); 2759 if (Abs && SP3Abs) { 2760 Error(Loc, "expected register or immediate"); 2761 return MatchOperand_ParseFail; 2762 } 2763 2764 OperandMatchResultTy Res; 2765 if (AllowImm) { 2766 Res = parseRegOrImm(Operands, SP3Abs); 2767 } else { 2768 Res = parseReg(Operands); 2769 } 2770 if (Res != MatchOperand_Success) { 2771 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2772 } 2773 2774 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2775 return MatchOperand_ParseFail; 2776 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2777 return MatchOperand_ParseFail; 2778 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2779 return MatchOperand_ParseFail; 2780 2781 AMDGPUOperand::Modifiers Mods; 2782 Mods.Abs = Abs || SP3Abs; 2783 Mods.Neg = Neg || SP3Neg; 2784 2785 if (Mods.hasFPModifiers()) { 2786 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2787 if (Op.isExpr()) { 2788 Error(Op.getStartLoc(), "expected an absolute expression"); 2789 return MatchOperand_ParseFail; 2790 } 2791 Op.setModifiers(Mods); 2792 } 2793 return MatchOperand_Success; 2794 } 2795 2796 OperandMatchResultTy 2797 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2798 bool AllowImm) { 2799 bool Sext = trySkipId("sext"); 2800 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2801 return MatchOperand_ParseFail; 2802 2803 OperandMatchResultTy Res; 2804 if (AllowImm) { 2805 Res = parseRegOrImm(Operands); 2806 } else { 2807 Res = parseReg(Operands); 2808 } 2809 if (Res != MatchOperand_Success) { 2810 return Sext? MatchOperand_ParseFail : Res; 2811 } 2812 2813 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2814 return MatchOperand_ParseFail; 2815 2816 AMDGPUOperand::Modifiers Mods; 2817 Mods.Sext = Sext; 2818 2819 if (Mods.hasIntModifiers()) { 2820 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2821 if (Op.isExpr()) { 2822 Error(Op.getStartLoc(), "expected an absolute expression"); 2823 return MatchOperand_ParseFail; 2824 } 2825 Op.setModifiers(Mods); 2826 } 2827 2828 return MatchOperand_Success; 2829 } 2830 2831 OperandMatchResultTy 2832 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2833 return parseRegOrImmWithFPInputMods(Operands, false); 2834 } 2835 2836 OperandMatchResultTy 2837 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2838 return parseRegOrImmWithIntInputMods(Operands, false); 2839 } 2840 2841 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2842 auto Loc = getLoc(); 2843 if (trySkipId("off")) { 2844 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2845 AMDGPUOperand::ImmTyOff, false)); 2846 return MatchOperand_Success; 2847 } 2848 2849 if (!isRegister()) 2850 return MatchOperand_NoMatch; 2851 2852 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2853 if (Reg) { 2854 Operands.push_back(std::move(Reg)); 2855 return MatchOperand_Success; 2856 } 2857 2858 return MatchOperand_ParseFail; 2859 2860 } 2861 2862 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2863 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2864 2865 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2866 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2867 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2868 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2869 return Match_InvalidOperand; 2870 2871 if ((TSFlags & SIInstrFlags::VOP3) && 2872 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2873 getForcedEncodingSize() != 64) 2874 return Match_PreferE32; 2875 2876 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2877 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2878 // v_mac_f32/16 allow only dst_sel == DWORD; 2879 auto OpNum = 2880 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2881 const auto &Op = Inst.getOperand(OpNum); 2882 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2883 return Match_InvalidOperand; 2884 } 2885 } 2886 2887 return Match_Success; 2888 } 2889 2890 static ArrayRef<unsigned> getAllVariants() { 2891 static const unsigned Variants[] = { 2892 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2893 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2894 }; 2895 2896 return makeArrayRef(Variants); 2897 } 2898 2899 // What asm variants we should check 2900 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2901 if (getForcedEncodingSize() == 32) { 2902 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2903 return makeArrayRef(Variants); 2904 } 2905 2906 if (isForcedVOP3()) { 2907 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2908 return makeArrayRef(Variants); 2909 } 2910 2911 if (isForcedSDWA()) { 2912 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2913 AMDGPUAsmVariants::SDWA9}; 2914 return makeArrayRef(Variants); 2915 } 2916 2917 if (isForcedDPP()) { 2918 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2919 return makeArrayRef(Variants); 2920 } 2921 2922 return getAllVariants(); 2923 } 2924 2925 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2926 if (getForcedEncodingSize() == 32) 2927 return "e32"; 2928 2929 if (isForcedVOP3()) 2930 return "e64"; 2931 2932 if (isForcedSDWA()) 2933 return "sdwa"; 2934 2935 if (isForcedDPP()) 2936 return "dpp"; 2937 2938 return ""; 2939 } 2940 2941 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2942 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2943 const unsigned Num = Desc.getNumImplicitUses(); 2944 for (unsigned i = 0; i < Num; ++i) { 2945 unsigned Reg = Desc.ImplicitUses[i]; 2946 switch (Reg) { 2947 case AMDGPU::FLAT_SCR: 2948 case AMDGPU::VCC: 2949 case AMDGPU::VCC_LO: 2950 case AMDGPU::VCC_HI: 2951 case AMDGPU::M0: 2952 return Reg; 2953 default: 2954 break; 2955 } 2956 } 2957 return AMDGPU::NoRegister; 2958 } 2959 2960 // NB: This code is correct only when used to check constant 2961 // bus limitations because GFX7 support no f16 inline constants. 2962 // Note that there are no cases when a GFX7 opcode violates 2963 // constant bus limitations due to the use of an f16 constant. 2964 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2965 unsigned OpIdx) const { 2966 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2967 2968 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2969 return false; 2970 } 2971 2972 const MCOperand &MO = Inst.getOperand(OpIdx); 2973 2974 int64_t Val = MO.getImm(); 2975 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2976 2977 switch (OpSize) { // expected operand size 2978 case 8: 2979 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2980 case 4: 2981 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2982 case 2: { 2983 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2984 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2985 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2986 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2987 return AMDGPU::isInlinableIntLiteral(Val); 2988 2989 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2990 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2991 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2992 return AMDGPU::isInlinableIntLiteralV216(Val); 2993 2994 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2995 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2996 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2997 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2998 2999 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3000 } 3001 default: 3002 llvm_unreachable("invalid operand size"); 3003 } 3004 } 3005 3006 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3007 if (!isGFX10Plus()) 3008 return 1; 3009 3010 switch (Opcode) { 3011 // 64-bit shift instructions can use only one scalar value input 3012 case AMDGPU::V_LSHLREV_B64_e64: 3013 case AMDGPU::V_LSHLREV_B64_gfx10: 3014 case AMDGPU::V_LSHRREV_B64_e64: 3015 case AMDGPU::V_LSHRREV_B64_gfx10: 3016 case AMDGPU::V_ASHRREV_I64_e64: 3017 case AMDGPU::V_ASHRREV_I64_gfx10: 3018 case AMDGPU::V_LSHL_B64_e64: 3019 case AMDGPU::V_LSHR_B64_e64: 3020 case AMDGPU::V_ASHR_I64_e64: 3021 return 1; 3022 default: 3023 return 2; 3024 } 3025 } 3026 3027 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3028 const MCOperand &MO = Inst.getOperand(OpIdx); 3029 if (MO.isImm()) { 3030 return !isInlineConstant(Inst, OpIdx); 3031 } else if (MO.isReg()) { 3032 auto Reg = MO.getReg(); 3033 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3034 auto PReg = mc2PseudoReg(Reg); 3035 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3036 } else { 3037 return true; 3038 } 3039 } 3040 3041 bool 3042 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3043 const OperandVector &Operands) { 3044 const unsigned Opcode = Inst.getOpcode(); 3045 const MCInstrDesc &Desc = MII.get(Opcode); 3046 unsigned LastSGPR = AMDGPU::NoRegister; 3047 unsigned ConstantBusUseCount = 0; 3048 unsigned NumLiterals = 0; 3049 unsigned LiteralSize; 3050 3051 if (Desc.TSFlags & 3052 (SIInstrFlags::VOPC | 3053 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3054 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3055 SIInstrFlags::SDWA)) { 3056 // Check special imm operands (used by madmk, etc) 3057 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3058 ++ConstantBusUseCount; 3059 } 3060 3061 SmallDenseSet<unsigned> SGPRsUsed; 3062 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3063 if (SGPRUsed != AMDGPU::NoRegister) { 3064 SGPRsUsed.insert(SGPRUsed); 3065 ++ConstantBusUseCount; 3066 } 3067 3068 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3069 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3070 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3071 3072 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3073 3074 for (int OpIdx : OpIndices) { 3075 if (OpIdx == -1) break; 3076 3077 const MCOperand &MO = Inst.getOperand(OpIdx); 3078 if (usesConstantBus(Inst, OpIdx)) { 3079 if (MO.isReg()) { 3080 LastSGPR = mc2PseudoReg(MO.getReg()); 3081 // Pairs of registers with a partial intersections like these 3082 // s0, s[0:1] 3083 // flat_scratch_lo, flat_scratch 3084 // flat_scratch_lo, flat_scratch_hi 3085 // are theoretically valid but they are disabled anyway. 3086 // Note that this code mimics SIInstrInfo::verifyInstruction 3087 if (!SGPRsUsed.count(LastSGPR)) { 3088 SGPRsUsed.insert(LastSGPR); 3089 ++ConstantBusUseCount; 3090 } 3091 } else { // Expression or a literal 3092 3093 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3094 continue; // special operand like VINTERP attr_chan 3095 3096 // An instruction may use only one literal. 3097 // This has been validated on the previous step. 3098 // See validateVOP3Literal. 3099 // This literal may be used as more than one operand. 3100 // If all these operands are of the same size, 3101 // this literal counts as one scalar value. 3102 // Otherwise it counts as 2 scalar values. 3103 // See "GFX10 Shader Programming", section 3.6.2.3. 3104 3105 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3106 if (Size < 4) Size = 4; 3107 3108 if (NumLiterals == 0) { 3109 NumLiterals = 1; 3110 LiteralSize = Size; 3111 } else if (LiteralSize != Size) { 3112 NumLiterals = 2; 3113 } 3114 } 3115 } 3116 } 3117 } 3118 ConstantBusUseCount += NumLiterals; 3119 3120 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3121 return true; 3122 3123 SMLoc LitLoc = getLitLoc(Operands); 3124 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3125 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3126 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3127 return false; 3128 } 3129 3130 bool 3131 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3132 const OperandVector &Operands) { 3133 const unsigned Opcode = Inst.getOpcode(); 3134 const MCInstrDesc &Desc = MII.get(Opcode); 3135 3136 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3137 if (DstIdx == -1 || 3138 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3139 return true; 3140 } 3141 3142 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3143 3144 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3145 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3146 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3147 3148 assert(DstIdx != -1); 3149 const MCOperand &Dst = Inst.getOperand(DstIdx); 3150 assert(Dst.isReg()); 3151 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3152 3153 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3154 3155 for (int SrcIdx : SrcIndices) { 3156 if (SrcIdx == -1) break; 3157 const MCOperand &Src = Inst.getOperand(SrcIdx); 3158 if (Src.isReg()) { 3159 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3160 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3161 Error(getRegLoc(SrcReg, Operands), 3162 "destination must be different than all sources"); 3163 return false; 3164 } 3165 } 3166 } 3167 3168 return true; 3169 } 3170 3171 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3172 3173 const unsigned Opc = Inst.getOpcode(); 3174 const MCInstrDesc &Desc = MII.get(Opc); 3175 3176 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3177 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3178 assert(ClampIdx != -1); 3179 return Inst.getOperand(ClampIdx).getImm() == 0; 3180 } 3181 3182 return true; 3183 } 3184 3185 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3186 3187 const unsigned Opc = Inst.getOpcode(); 3188 const MCInstrDesc &Desc = MII.get(Opc); 3189 3190 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3191 return true; 3192 3193 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3194 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3195 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3196 3197 assert(VDataIdx != -1); 3198 3199 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3200 return true; 3201 3202 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3203 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3204 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3205 if (DMask == 0) 3206 DMask = 1; 3207 3208 unsigned DataSize = 3209 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3210 if (hasPackedD16()) { 3211 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3212 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3213 DataSize = (DataSize + 1) / 2; 3214 } 3215 3216 return (VDataSize / 4) == DataSize + TFESize; 3217 } 3218 3219 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3220 const unsigned Opc = Inst.getOpcode(); 3221 const MCInstrDesc &Desc = MII.get(Opc); 3222 3223 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3224 return true; 3225 3226 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3227 3228 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3229 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3230 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3231 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3232 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3233 3234 assert(VAddr0Idx != -1); 3235 assert(SrsrcIdx != -1); 3236 assert(SrsrcIdx > VAddr0Idx); 3237 3238 if (DimIdx == -1) 3239 return true; // intersect_ray 3240 3241 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3242 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3243 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3244 unsigned VAddrSize = 3245 IsNSA ? SrsrcIdx - VAddr0Idx 3246 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3247 3248 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3249 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3250 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3251 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3252 if (!IsNSA) { 3253 if (AddrSize > 8) 3254 AddrSize = 16; 3255 else if (AddrSize > 4) 3256 AddrSize = 8; 3257 } 3258 3259 return VAddrSize == AddrSize; 3260 } 3261 3262 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3263 3264 const unsigned Opc = Inst.getOpcode(); 3265 const MCInstrDesc &Desc = MII.get(Opc); 3266 3267 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3268 return true; 3269 if (!Desc.mayLoad() || !Desc.mayStore()) 3270 return true; // Not atomic 3271 3272 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3273 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3274 3275 // This is an incomplete check because image_atomic_cmpswap 3276 // may only use 0x3 and 0xf while other atomic operations 3277 // may use 0x1 and 0x3. However these limitations are 3278 // verified when we check that dmask matches dst size. 3279 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3280 } 3281 3282 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3283 3284 const unsigned Opc = Inst.getOpcode(); 3285 const MCInstrDesc &Desc = MII.get(Opc); 3286 3287 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3288 return true; 3289 3290 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3291 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3292 3293 // GATHER4 instructions use dmask in a different fashion compared to 3294 // other MIMG instructions. The only useful DMASK values are 3295 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3296 // (red,red,red,red) etc.) The ISA document doesn't mention 3297 // this. 3298 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3299 } 3300 3301 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3302 { 3303 switch (Opcode) { 3304 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3305 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3306 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3307 return true; 3308 default: 3309 return false; 3310 } 3311 } 3312 3313 // movrels* opcodes should only allow VGPRS as src0. 3314 // This is specified in .td description for vop1/vop3, 3315 // but sdwa is handled differently. See isSDWAOperand. 3316 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3317 const OperandVector &Operands) { 3318 3319 const unsigned Opc = Inst.getOpcode(); 3320 const MCInstrDesc &Desc = MII.get(Opc); 3321 3322 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3323 return true; 3324 3325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3326 assert(Src0Idx != -1); 3327 3328 SMLoc ErrLoc; 3329 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3330 if (Src0.isReg()) { 3331 auto Reg = mc2PseudoReg(Src0.getReg()); 3332 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3333 if (!isSGPR(Reg, TRI)) 3334 return true; 3335 ErrLoc = getRegLoc(Reg, Operands); 3336 } else { 3337 ErrLoc = getConstLoc(Operands); 3338 } 3339 3340 Error(ErrLoc, "source operand must be a VGPR"); 3341 return false; 3342 } 3343 3344 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3345 const OperandVector &Operands) { 3346 3347 const unsigned Opc = Inst.getOpcode(); 3348 3349 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3350 return true; 3351 3352 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3353 assert(Src0Idx != -1); 3354 3355 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3356 if (!Src0.isReg()) 3357 return true; 3358 3359 auto Reg = mc2PseudoReg(Src0.getReg()); 3360 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3361 if (isSGPR(Reg, TRI)) { 3362 Error(getRegLoc(Reg, Operands), 3363 "source operand must be either a VGPR or an inline constant"); 3364 return false; 3365 } 3366 3367 return true; 3368 } 3369 3370 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3371 switch (Inst.getOpcode()) { 3372 default: 3373 return true; 3374 case V_DIV_SCALE_F32_gfx6_gfx7: 3375 case V_DIV_SCALE_F32_vi: 3376 case V_DIV_SCALE_F32_gfx10: 3377 case V_DIV_SCALE_F64_gfx6_gfx7: 3378 case V_DIV_SCALE_F64_vi: 3379 case V_DIV_SCALE_F64_gfx10: 3380 break; 3381 } 3382 3383 // TODO: Check that src0 = src1 or src2. 3384 3385 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3386 AMDGPU::OpName::src2_modifiers, 3387 AMDGPU::OpName::src2_modifiers}) { 3388 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3389 .getImm() & 3390 SISrcMods::ABS) { 3391 return false; 3392 } 3393 } 3394 3395 return true; 3396 } 3397 3398 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3399 3400 const unsigned Opc = Inst.getOpcode(); 3401 const MCInstrDesc &Desc = MII.get(Opc); 3402 3403 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3404 return true; 3405 3406 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3407 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3408 if (isCI() || isSI()) 3409 return false; 3410 } 3411 3412 return true; 3413 } 3414 3415 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3416 const unsigned Opc = Inst.getOpcode(); 3417 const MCInstrDesc &Desc = MII.get(Opc); 3418 3419 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3420 return true; 3421 3422 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3423 if (DimIdx < 0) 3424 return true; 3425 3426 long Imm = Inst.getOperand(DimIdx).getImm(); 3427 if (Imm < 0 || Imm >= 8) 3428 return false; 3429 3430 return true; 3431 } 3432 3433 static bool IsRevOpcode(const unsigned Opcode) 3434 { 3435 switch (Opcode) { 3436 case AMDGPU::V_SUBREV_F32_e32: 3437 case AMDGPU::V_SUBREV_F32_e64: 3438 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3439 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3440 case AMDGPU::V_SUBREV_F32_e32_vi: 3441 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3442 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3443 case AMDGPU::V_SUBREV_F32_e64_vi: 3444 3445 case AMDGPU::V_SUBREV_CO_U32_e32: 3446 case AMDGPU::V_SUBREV_CO_U32_e64: 3447 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3448 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3449 3450 case AMDGPU::V_SUBBREV_U32_e32: 3451 case AMDGPU::V_SUBBREV_U32_e64: 3452 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3453 case AMDGPU::V_SUBBREV_U32_e32_vi: 3454 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3455 case AMDGPU::V_SUBBREV_U32_e64_vi: 3456 3457 case AMDGPU::V_SUBREV_U32_e32: 3458 case AMDGPU::V_SUBREV_U32_e64: 3459 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3460 case AMDGPU::V_SUBREV_U32_e32_vi: 3461 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3462 case AMDGPU::V_SUBREV_U32_e64_vi: 3463 3464 case AMDGPU::V_SUBREV_F16_e32: 3465 case AMDGPU::V_SUBREV_F16_e64: 3466 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3467 case AMDGPU::V_SUBREV_F16_e32_vi: 3468 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3469 case AMDGPU::V_SUBREV_F16_e64_vi: 3470 3471 case AMDGPU::V_SUBREV_U16_e32: 3472 case AMDGPU::V_SUBREV_U16_e64: 3473 case AMDGPU::V_SUBREV_U16_e32_vi: 3474 case AMDGPU::V_SUBREV_U16_e64_vi: 3475 3476 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3477 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3478 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3479 3480 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3481 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3482 3483 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3484 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3485 3486 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3487 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3488 3489 case AMDGPU::V_LSHRREV_B32_e32: 3490 case AMDGPU::V_LSHRREV_B32_e64: 3491 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3492 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3493 case AMDGPU::V_LSHRREV_B32_e32_vi: 3494 case AMDGPU::V_LSHRREV_B32_e64_vi: 3495 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3496 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3497 3498 case AMDGPU::V_ASHRREV_I32_e32: 3499 case AMDGPU::V_ASHRREV_I32_e64: 3500 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3501 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3502 case AMDGPU::V_ASHRREV_I32_e32_vi: 3503 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3504 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3505 case AMDGPU::V_ASHRREV_I32_e64_vi: 3506 3507 case AMDGPU::V_LSHLREV_B32_e32: 3508 case AMDGPU::V_LSHLREV_B32_e64: 3509 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3510 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3511 case AMDGPU::V_LSHLREV_B32_e32_vi: 3512 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3513 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3514 case AMDGPU::V_LSHLREV_B32_e64_vi: 3515 3516 case AMDGPU::V_LSHLREV_B16_e32: 3517 case AMDGPU::V_LSHLREV_B16_e64: 3518 case AMDGPU::V_LSHLREV_B16_e32_vi: 3519 case AMDGPU::V_LSHLREV_B16_e64_vi: 3520 case AMDGPU::V_LSHLREV_B16_gfx10: 3521 3522 case AMDGPU::V_LSHRREV_B16_e32: 3523 case AMDGPU::V_LSHRREV_B16_e64: 3524 case AMDGPU::V_LSHRREV_B16_e32_vi: 3525 case AMDGPU::V_LSHRREV_B16_e64_vi: 3526 case AMDGPU::V_LSHRREV_B16_gfx10: 3527 3528 case AMDGPU::V_ASHRREV_I16_e32: 3529 case AMDGPU::V_ASHRREV_I16_e64: 3530 case AMDGPU::V_ASHRREV_I16_e32_vi: 3531 case AMDGPU::V_ASHRREV_I16_e64_vi: 3532 case AMDGPU::V_ASHRREV_I16_gfx10: 3533 3534 case AMDGPU::V_LSHLREV_B64_e64: 3535 case AMDGPU::V_LSHLREV_B64_gfx10: 3536 case AMDGPU::V_LSHLREV_B64_vi: 3537 3538 case AMDGPU::V_LSHRREV_B64_e64: 3539 case AMDGPU::V_LSHRREV_B64_gfx10: 3540 case AMDGPU::V_LSHRREV_B64_vi: 3541 3542 case AMDGPU::V_ASHRREV_I64_e64: 3543 case AMDGPU::V_ASHRREV_I64_gfx10: 3544 case AMDGPU::V_ASHRREV_I64_vi: 3545 3546 case AMDGPU::V_PK_LSHLREV_B16: 3547 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3548 case AMDGPU::V_PK_LSHLREV_B16_vi: 3549 3550 case AMDGPU::V_PK_LSHRREV_B16: 3551 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3552 case AMDGPU::V_PK_LSHRREV_B16_vi: 3553 case AMDGPU::V_PK_ASHRREV_I16: 3554 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3555 case AMDGPU::V_PK_ASHRREV_I16_vi: 3556 return true; 3557 default: 3558 return false; 3559 } 3560 } 3561 3562 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3563 3564 using namespace SIInstrFlags; 3565 const unsigned Opcode = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opcode); 3567 3568 // lds_direct register is defined so that it can be used 3569 // with 9-bit operands only. Ignore encodings which do not accept these. 3570 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3571 return true; 3572 3573 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3574 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3575 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3576 3577 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3578 3579 // lds_direct cannot be specified as either src1 or src2. 3580 for (int SrcIdx : SrcIndices) { 3581 if (SrcIdx == -1) break; 3582 const MCOperand &Src = Inst.getOperand(SrcIdx); 3583 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3584 return false; 3585 } 3586 } 3587 3588 if (Src0Idx == -1) 3589 return true; 3590 3591 const MCOperand &Src = Inst.getOperand(Src0Idx); 3592 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3593 return true; 3594 3595 // lds_direct is specified as src0. Check additional limitations. 3596 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3597 } 3598 3599 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3600 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3601 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3602 if (Op.isFlatOffset()) 3603 return Op.getStartLoc(); 3604 } 3605 return getLoc(); 3606 } 3607 3608 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3609 const OperandVector &Operands) { 3610 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3611 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3612 return true; 3613 3614 auto Opcode = Inst.getOpcode(); 3615 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3616 assert(OpNum != -1); 3617 3618 const auto &Op = Inst.getOperand(OpNum); 3619 if (!hasFlatOffsets() && Op.getImm() != 0) { 3620 Error(getFlatOffsetLoc(Operands), 3621 "flat offset modifier is not supported on this GPU"); 3622 return false; 3623 } 3624 3625 // For FLAT segment the offset must be positive; 3626 // MSB is ignored and forced to zero. 3627 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3628 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3629 if (!isIntN(OffsetSize, Op.getImm())) { 3630 Error(getFlatOffsetLoc(Operands), 3631 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3632 return false; 3633 } 3634 } else { 3635 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3636 if (!isUIntN(OffsetSize, Op.getImm())) { 3637 Error(getFlatOffsetLoc(Operands), 3638 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3639 return false; 3640 } 3641 } 3642 3643 return true; 3644 } 3645 3646 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3647 // Start with second operand because SMEM Offset cannot be dst or src0. 3648 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3649 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3650 if (Op.isSMEMOffset()) 3651 return Op.getStartLoc(); 3652 } 3653 return getLoc(); 3654 } 3655 3656 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3657 const OperandVector &Operands) { 3658 if (isCI() || isSI()) 3659 return true; 3660 3661 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3662 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3663 return true; 3664 3665 auto Opcode = Inst.getOpcode(); 3666 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3667 if (OpNum == -1) 3668 return true; 3669 3670 const auto &Op = Inst.getOperand(OpNum); 3671 if (!Op.isImm()) 3672 return true; 3673 3674 uint64_t Offset = Op.getImm(); 3675 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3676 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3677 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3678 return true; 3679 3680 Error(getSMEMOffsetLoc(Operands), 3681 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3682 "expected a 21-bit signed offset"); 3683 3684 return false; 3685 } 3686 3687 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3688 unsigned Opcode = Inst.getOpcode(); 3689 const MCInstrDesc &Desc = MII.get(Opcode); 3690 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3691 return true; 3692 3693 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3694 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3695 3696 const int OpIndices[] = { Src0Idx, Src1Idx }; 3697 3698 unsigned NumExprs = 0; 3699 unsigned NumLiterals = 0; 3700 uint32_t LiteralValue; 3701 3702 for (int OpIdx : OpIndices) { 3703 if (OpIdx == -1) break; 3704 3705 const MCOperand &MO = Inst.getOperand(OpIdx); 3706 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3707 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3708 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3709 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3710 if (NumLiterals == 0 || LiteralValue != Value) { 3711 LiteralValue = Value; 3712 ++NumLiterals; 3713 } 3714 } else if (MO.isExpr()) { 3715 ++NumExprs; 3716 } 3717 } 3718 } 3719 3720 return NumLiterals + NumExprs <= 1; 3721 } 3722 3723 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3724 const unsigned Opc = Inst.getOpcode(); 3725 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3726 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3727 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3728 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3729 3730 if (OpSel & ~3) 3731 return false; 3732 } 3733 return true; 3734 } 3735 3736 // Check if VCC register matches wavefront size 3737 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3738 auto FB = getFeatureBits(); 3739 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3740 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3741 } 3742 3743 // VOP3 literal is only allowed in GFX10+ and only one can be used 3744 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3745 const OperandVector &Operands) { 3746 unsigned Opcode = Inst.getOpcode(); 3747 const MCInstrDesc &Desc = MII.get(Opcode); 3748 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3749 return true; 3750 3751 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3752 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3753 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3754 3755 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3756 3757 unsigned NumExprs = 0; 3758 unsigned NumLiterals = 0; 3759 uint32_t LiteralValue; 3760 3761 for (int OpIdx : OpIndices) { 3762 if (OpIdx == -1) break; 3763 3764 const MCOperand &MO = Inst.getOperand(OpIdx); 3765 if (!MO.isImm() && !MO.isExpr()) 3766 continue; 3767 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3768 continue; 3769 3770 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3771 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3772 Error(getConstLoc(Operands), 3773 "inline constants are not allowed for this operand"); 3774 return false; 3775 } 3776 3777 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3778 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3779 if (NumLiterals == 0 || LiteralValue != Value) { 3780 LiteralValue = Value; 3781 ++NumLiterals; 3782 } 3783 } else if (MO.isExpr()) { 3784 ++NumExprs; 3785 } 3786 } 3787 NumLiterals += NumExprs; 3788 3789 if (!NumLiterals) 3790 return true; 3791 3792 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3793 Error(getLitLoc(Operands), "literal operands are not supported"); 3794 return false; 3795 } 3796 3797 if (NumLiterals > 1) { 3798 Error(getLitLoc(Operands), "only one literal operand is allowed"); 3799 return false; 3800 } 3801 3802 return true; 3803 } 3804 3805 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 3806 const OperandVector &Operands, 3807 const SMLoc &IDLoc) { 3808 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 3809 AMDGPU::OpName::glc1); 3810 if (GLCPos != -1) { 3811 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 3812 // in the asm string, and the default value means it is not present. 3813 if (Inst.getOperand(GLCPos).getImm() == -1) { 3814 Error(IDLoc, "instruction must use glc"); 3815 return false; 3816 } 3817 } 3818 3819 return true; 3820 } 3821 3822 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3823 const SMLoc &IDLoc, 3824 const OperandVector &Operands) { 3825 if (!validateLdsDirect(Inst)) { 3826 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 3827 "invalid use of lds_direct"); 3828 return false; 3829 } 3830 if (!validateSOPLiteral(Inst)) { 3831 Error(getLitLoc(Operands), 3832 "only one literal operand is allowed"); 3833 return false; 3834 } 3835 if (!validateVOP3Literal(Inst, Operands)) { 3836 return false; 3837 } 3838 if (!validateConstantBusLimitations(Inst, Operands)) { 3839 return false; 3840 } 3841 if (!validateEarlyClobberLimitations(Inst, Operands)) { 3842 return false; 3843 } 3844 if (!validateIntClampSupported(Inst)) { 3845 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 3846 "integer clamping is not supported on this GPU"); 3847 return false; 3848 } 3849 if (!validateOpSel(Inst)) { 3850 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 3851 "invalid op_sel operand"); 3852 return false; 3853 } 3854 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3855 if (!validateMIMGD16(Inst)) { 3856 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 3857 "d16 modifier is not supported on this GPU"); 3858 return false; 3859 } 3860 if (!validateMIMGDim(Inst)) { 3861 Error(IDLoc, "dim modifier is required on this GPU"); 3862 return false; 3863 } 3864 if (!validateMIMGDataSize(Inst)) { 3865 Error(IDLoc, 3866 "image data size does not match dmask and tfe"); 3867 return false; 3868 } 3869 if (!validateMIMGAddrSize(Inst)) { 3870 Error(IDLoc, 3871 "image address size does not match dim and a16"); 3872 return false; 3873 } 3874 if (!validateMIMGAtomicDMask(Inst)) { 3875 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3876 "invalid atomic image dmask"); 3877 return false; 3878 } 3879 if (!validateMIMGGatherDMask(Inst)) { 3880 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3881 "invalid image_gather dmask: only one bit must be set"); 3882 return false; 3883 } 3884 if (!validateMovrels(Inst, Operands)) { 3885 return false; 3886 } 3887 if (!validateFlatOffset(Inst, Operands)) { 3888 return false; 3889 } 3890 if (!validateSMEMOffset(Inst, Operands)) { 3891 return false; 3892 } 3893 if (!validateMAIAccWrite(Inst, Operands)) { 3894 return false; 3895 } 3896 if (!validateDivScale(Inst)) { 3897 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 3898 return false; 3899 } 3900 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 3901 return false; 3902 } 3903 3904 return true; 3905 } 3906 3907 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3908 const FeatureBitset &FBS, 3909 unsigned VariantID = 0); 3910 3911 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3912 const FeatureBitset &AvailableFeatures, 3913 unsigned VariantID); 3914 3915 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3916 const FeatureBitset &FBS) { 3917 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3918 } 3919 3920 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3921 const FeatureBitset &FBS, 3922 ArrayRef<unsigned> Variants) { 3923 for (auto Variant : Variants) { 3924 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3925 return true; 3926 } 3927 3928 return false; 3929 } 3930 3931 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3932 const SMLoc &IDLoc) { 3933 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3934 3935 // Check if requested instruction variant is supported. 3936 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3937 return false; 3938 3939 // This instruction is not supported. 3940 // Clear any other pending errors because they are no longer relevant. 3941 getParser().clearPendingErrors(); 3942 3943 // Requested instruction variant is not supported. 3944 // Check if any other variants are supported. 3945 StringRef VariantName = getMatchedVariantName(); 3946 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3947 return Error(IDLoc, 3948 Twine(VariantName, 3949 " variant of this instruction is not supported")); 3950 } 3951 3952 // Finally check if this instruction is supported on any other GPU. 3953 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3954 return Error(IDLoc, "instruction not supported on this GPU"); 3955 } 3956 3957 // Instruction not supported on any GPU. Probably a typo. 3958 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3959 return Error(IDLoc, "invalid instruction" + Suggestion); 3960 } 3961 3962 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3963 OperandVector &Operands, 3964 MCStreamer &Out, 3965 uint64_t &ErrorInfo, 3966 bool MatchingInlineAsm) { 3967 MCInst Inst; 3968 unsigned Result = Match_Success; 3969 for (auto Variant : getMatchedVariants()) { 3970 uint64_t EI; 3971 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3972 Variant); 3973 // We order match statuses from least to most specific. We use most specific 3974 // status as resulting 3975 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3976 if ((R == Match_Success) || 3977 (R == Match_PreferE32) || 3978 (R == Match_MissingFeature && Result != Match_PreferE32) || 3979 (R == Match_InvalidOperand && Result != Match_MissingFeature 3980 && Result != Match_PreferE32) || 3981 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3982 && Result != Match_MissingFeature 3983 && Result != Match_PreferE32)) { 3984 Result = R; 3985 ErrorInfo = EI; 3986 } 3987 if (R == Match_Success) 3988 break; 3989 } 3990 3991 if (Result == Match_Success) { 3992 if (!validateInstruction(Inst, IDLoc, Operands)) { 3993 return true; 3994 } 3995 Inst.setLoc(IDLoc); 3996 Out.emitInstruction(Inst, getSTI()); 3997 return false; 3998 } 3999 4000 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4001 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4002 return true; 4003 } 4004 4005 switch (Result) { 4006 default: break; 4007 case Match_MissingFeature: 4008 // It has been verified that the specified instruction 4009 // mnemonic is valid. A match was found but it requires 4010 // features which are not supported on this GPU. 4011 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4012 4013 case Match_InvalidOperand: { 4014 SMLoc ErrorLoc = IDLoc; 4015 if (ErrorInfo != ~0ULL) { 4016 if (ErrorInfo >= Operands.size()) { 4017 return Error(IDLoc, "too few operands for instruction"); 4018 } 4019 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4020 if (ErrorLoc == SMLoc()) 4021 ErrorLoc = IDLoc; 4022 } 4023 return Error(ErrorLoc, "invalid operand for instruction"); 4024 } 4025 4026 case Match_PreferE32: 4027 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4028 "should be encoded as e32"); 4029 case Match_MnemonicFail: 4030 llvm_unreachable("Invalid instructions should have been handled already"); 4031 } 4032 llvm_unreachable("Implement any new match types added!"); 4033 } 4034 4035 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4036 int64_t Tmp = -1; 4037 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4038 return true; 4039 } 4040 if (getParser().parseAbsoluteExpression(Tmp)) { 4041 return true; 4042 } 4043 Ret = static_cast<uint32_t>(Tmp); 4044 return false; 4045 } 4046 4047 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4048 uint32_t &Minor) { 4049 if (ParseAsAbsoluteExpression(Major)) 4050 return TokError("invalid major version"); 4051 4052 if (!trySkipToken(AsmToken::Comma)) 4053 return TokError("minor version number required, comma expected"); 4054 4055 if (ParseAsAbsoluteExpression(Minor)) 4056 return TokError("invalid minor version"); 4057 4058 return false; 4059 } 4060 4061 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4062 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4063 return TokError("directive only supported for amdgcn architecture"); 4064 4065 std::string Target; 4066 4067 SMLoc TargetStart = getLoc(); 4068 if (getParser().parseEscapedString(Target)) 4069 return true; 4070 SMRange TargetRange = SMRange(TargetStart, getLoc()); 4071 4072 std::string ExpectedTarget; 4073 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4074 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4075 4076 if (Target != ExpectedTargetOS.str()) 4077 return Error(TargetRange.Start, "target must match options", TargetRange); 4078 4079 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4080 return false; 4081 } 4082 4083 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4084 return Error(Range.Start, "value out of range", Range); 4085 } 4086 4087 bool AMDGPUAsmParser::calculateGPRBlocks( 4088 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4089 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4090 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4091 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4092 // TODO(scott.linder): These calculations are duplicated from 4093 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4094 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4095 4096 unsigned NumVGPRs = NextFreeVGPR; 4097 unsigned NumSGPRs = NextFreeSGPR; 4098 4099 if (Version.Major >= 10) 4100 NumSGPRs = 0; 4101 else { 4102 unsigned MaxAddressableNumSGPRs = 4103 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4104 4105 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4106 NumSGPRs > MaxAddressableNumSGPRs) 4107 return OutOfRangeError(SGPRRange); 4108 4109 NumSGPRs += 4110 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4111 4112 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4113 NumSGPRs > MaxAddressableNumSGPRs) 4114 return OutOfRangeError(SGPRRange); 4115 4116 if (Features.test(FeatureSGPRInitBug)) 4117 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4118 } 4119 4120 VGPRBlocks = 4121 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4122 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4123 4124 return false; 4125 } 4126 4127 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4128 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4129 return TokError("directive only supported for amdgcn architecture"); 4130 4131 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4132 return TokError("directive only supported for amdhsa OS"); 4133 4134 StringRef KernelName; 4135 if (getParser().parseIdentifier(KernelName)) 4136 return true; 4137 4138 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4139 4140 StringSet<> Seen; 4141 4142 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4143 4144 SMRange VGPRRange; 4145 uint64_t NextFreeVGPR = 0; 4146 SMRange SGPRRange; 4147 uint64_t NextFreeSGPR = 0; 4148 unsigned UserSGPRCount = 0; 4149 bool ReserveVCC = true; 4150 bool ReserveFlatScr = true; 4151 bool ReserveXNACK = hasXNACK(); 4152 Optional<bool> EnableWavefrontSize32; 4153 4154 while (true) { 4155 while (trySkipToken(AsmToken::EndOfStatement)); 4156 4157 StringRef ID; 4158 SMRange IDRange = getTok().getLocRange(); 4159 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4160 return true; 4161 4162 if (ID == ".end_amdhsa_kernel") 4163 break; 4164 4165 if (Seen.find(ID) != Seen.end()) 4166 return TokError(".amdhsa_ directives cannot be repeated"); 4167 Seen.insert(ID); 4168 4169 SMLoc ValStart = getLoc(); 4170 int64_t IVal; 4171 if (getParser().parseAbsoluteExpression(IVal)) 4172 return true; 4173 SMLoc ValEnd = getLoc(); 4174 SMRange ValRange = SMRange(ValStart, ValEnd); 4175 4176 if (IVal < 0) 4177 return OutOfRangeError(ValRange); 4178 4179 uint64_t Val = IVal; 4180 4181 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4182 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4183 return OutOfRangeError(RANGE); \ 4184 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4185 4186 if (ID == ".amdhsa_group_segment_fixed_size") { 4187 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4188 return OutOfRangeError(ValRange); 4189 KD.group_segment_fixed_size = Val; 4190 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4191 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4192 return OutOfRangeError(ValRange); 4193 KD.private_segment_fixed_size = Val; 4194 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4195 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4196 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4197 Val, ValRange); 4198 if (Val) 4199 UserSGPRCount += 4; 4200 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4201 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4202 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4203 ValRange); 4204 if (Val) 4205 UserSGPRCount += 2; 4206 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4207 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4208 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4209 ValRange); 4210 if (Val) 4211 UserSGPRCount += 2; 4212 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4213 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4214 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4215 Val, ValRange); 4216 if (Val) 4217 UserSGPRCount += 2; 4218 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4219 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4220 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4221 ValRange); 4222 if (Val) 4223 UserSGPRCount += 2; 4224 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4225 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4226 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4227 ValRange); 4228 if (Val) 4229 UserSGPRCount += 2; 4230 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4231 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4232 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4233 Val, ValRange); 4234 if (Val) 4235 UserSGPRCount += 1; 4236 } else if (ID == ".amdhsa_wavefront_size32") { 4237 if (IVersion.Major < 10) 4238 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4239 EnableWavefrontSize32 = Val; 4240 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4241 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4242 Val, ValRange); 4243 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4244 PARSE_BITS_ENTRY( 4245 KD.compute_pgm_rsrc2, 4246 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4247 ValRange); 4248 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4249 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4250 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4251 ValRange); 4252 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4253 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4254 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4255 ValRange); 4256 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4257 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4258 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4259 ValRange); 4260 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4261 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4262 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4263 ValRange); 4264 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4265 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4266 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4267 ValRange); 4268 } else if (ID == ".amdhsa_next_free_vgpr") { 4269 VGPRRange = ValRange; 4270 NextFreeVGPR = Val; 4271 } else if (ID == ".amdhsa_next_free_sgpr") { 4272 SGPRRange = ValRange; 4273 NextFreeSGPR = Val; 4274 } else if (ID == ".amdhsa_reserve_vcc") { 4275 if (!isUInt<1>(Val)) 4276 return OutOfRangeError(ValRange); 4277 ReserveVCC = Val; 4278 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4279 if (IVersion.Major < 7) 4280 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4281 if (!isUInt<1>(Val)) 4282 return OutOfRangeError(ValRange); 4283 ReserveFlatScr = Val; 4284 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4285 if (IVersion.Major < 8) 4286 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4287 if (!isUInt<1>(Val)) 4288 return OutOfRangeError(ValRange); 4289 ReserveXNACK = Val; 4290 } else if (ID == ".amdhsa_float_round_mode_32") { 4291 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4292 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4293 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4294 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4295 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4296 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4297 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4298 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4299 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4300 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4301 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4302 ValRange); 4303 } else if (ID == ".amdhsa_dx10_clamp") { 4304 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4305 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4306 } else if (ID == ".amdhsa_ieee_mode") { 4307 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4308 Val, ValRange); 4309 } else if (ID == ".amdhsa_fp16_overflow") { 4310 if (IVersion.Major < 9) 4311 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4312 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4313 ValRange); 4314 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4315 if (IVersion.Major < 10) 4316 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4317 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4318 ValRange); 4319 } else if (ID == ".amdhsa_memory_ordered") { 4320 if (IVersion.Major < 10) 4321 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4322 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4323 ValRange); 4324 } else if (ID == ".amdhsa_forward_progress") { 4325 if (IVersion.Major < 10) 4326 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4327 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4328 ValRange); 4329 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4330 PARSE_BITS_ENTRY( 4331 KD.compute_pgm_rsrc2, 4332 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4333 ValRange); 4334 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4335 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4336 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4337 Val, ValRange); 4338 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4339 PARSE_BITS_ENTRY( 4340 KD.compute_pgm_rsrc2, 4341 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4342 ValRange); 4343 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4344 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4345 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4346 Val, ValRange); 4347 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4348 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4349 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4350 Val, ValRange); 4351 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4352 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4353 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4354 Val, ValRange); 4355 } else if (ID == ".amdhsa_exception_int_div_zero") { 4356 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4357 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4358 Val, ValRange); 4359 } else { 4360 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4361 } 4362 4363 #undef PARSE_BITS_ENTRY 4364 } 4365 4366 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4367 return TokError(".amdhsa_next_free_vgpr directive is required"); 4368 4369 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4370 return TokError(".amdhsa_next_free_sgpr directive is required"); 4371 4372 unsigned VGPRBlocks; 4373 unsigned SGPRBlocks; 4374 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4375 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4376 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4377 SGPRBlocks)) 4378 return true; 4379 4380 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4381 VGPRBlocks)) 4382 return OutOfRangeError(VGPRRange); 4383 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4384 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4385 4386 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4387 SGPRBlocks)) 4388 return OutOfRangeError(SGPRRange); 4389 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4390 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4391 SGPRBlocks); 4392 4393 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4394 return TokError("too many user SGPRs enabled"); 4395 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4396 UserSGPRCount); 4397 4398 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4399 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4400 ReserveFlatScr, ReserveXNACK); 4401 return false; 4402 } 4403 4404 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4405 uint32_t Major; 4406 uint32_t Minor; 4407 4408 if (ParseDirectiveMajorMinor(Major, Minor)) 4409 return true; 4410 4411 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4412 return false; 4413 } 4414 4415 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4416 uint32_t Major; 4417 uint32_t Minor; 4418 uint32_t Stepping; 4419 StringRef VendorName; 4420 StringRef ArchName; 4421 4422 // If this directive has no arguments, then use the ISA version for the 4423 // targeted GPU. 4424 if (isToken(AsmToken::EndOfStatement)) { 4425 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4426 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4427 ISA.Stepping, 4428 "AMD", "AMDGPU"); 4429 return false; 4430 } 4431 4432 if (ParseDirectiveMajorMinor(Major, Minor)) 4433 return true; 4434 4435 if (!trySkipToken(AsmToken::Comma)) 4436 return TokError("stepping version number required, comma expected"); 4437 4438 if (ParseAsAbsoluteExpression(Stepping)) 4439 return TokError("invalid stepping version"); 4440 4441 if (!trySkipToken(AsmToken::Comma)) 4442 return TokError("vendor name required, comma expected"); 4443 4444 if (!parseString(VendorName, "invalid vendor name")) 4445 return true; 4446 4447 if (!trySkipToken(AsmToken::Comma)) 4448 return TokError("arch name required, comma expected"); 4449 4450 if (!parseString(ArchName, "invalid arch name")) 4451 return true; 4452 4453 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4454 VendorName, ArchName); 4455 return false; 4456 } 4457 4458 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4459 amd_kernel_code_t &Header) { 4460 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4461 // assembly for backwards compatibility. 4462 if (ID == "max_scratch_backing_memory_byte_size") { 4463 Parser.eatToEndOfStatement(); 4464 return false; 4465 } 4466 4467 SmallString<40> ErrStr; 4468 raw_svector_ostream Err(ErrStr); 4469 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4470 return TokError(Err.str()); 4471 } 4472 Lex(); 4473 4474 if (ID == "enable_wavefront_size32") { 4475 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4476 if (!isGFX10Plus()) 4477 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4478 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4479 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4480 } else { 4481 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4482 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4483 } 4484 } 4485 4486 if (ID == "wavefront_size") { 4487 if (Header.wavefront_size == 5) { 4488 if (!isGFX10Plus()) 4489 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4490 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4491 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4492 } else if (Header.wavefront_size == 6) { 4493 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4494 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4495 } 4496 } 4497 4498 if (ID == "enable_wgp_mode") { 4499 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4500 !isGFX10Plus()) 4501 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4502 } 4503 4504 if (ID == "enable_mem_ordered") { 4505 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4506 !isGFX10Plus()) 4507 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4508 } 4509 4510 if (ID == "enable_fwd_progress") { 4511 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4512 !isGFX10Plus()) 4513 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4514 } 4515 4516 return false; 4517 } 4518 4519 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4520 amd_kernel_code_t Header; 4521 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4522 4523 while (true) { 4524 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4525 // will set the current token to EndOfStatement. 4526 while(trySkipToken(AsmToken::EndOfStatement)); 4527 4528 StringRef ID; 4529 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4530 return true; 4531 4532 if (ID == ".end_amd_kernel_code_t") 4533 break; 4534 4535 if (ParseAMDKernelCodeTValue(ID, Header)) 4536 return true; 4537 } 4538 4539 getTargetStreamer().EmitAMDKernelCodeT(Header); 4540 4541 return false; 4542 } 4543 4544 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4545 StringRef KernelName; 4546 if (!parseId(KernelName, "expected symbol name")) 4547 return true; 4548 4549 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4550 ELF::STT_AMDGPU_HSA_KERNEL); 4551 4552 KernelScope.initialize(getContext()); 4553 return false; 4554 } 4555 4556 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4557 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4558 return Error(getLoc(), 4559 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4560 "architectures"); 4561 } 4562 4563 auto ISAVersionStringFromASM = getToken().getStringContents(); 4564 4565 std::string ISAVersionStringFromSTI; 4566 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4567 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4568 4569 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4570 return Error(getLoc(), 4571 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4572 "arguments specified through the command line"); 4573 } 4574 4575 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4576 Lex(); 4577 4578 return false; 4579 } 4580 4581 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4582 const char *AssemblerDirectiveBegin; 4583 const char *AssemblerDirectiveEnd; 4584 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4585 isHsaAbiVersion3(&getSTI()) 4586 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4587 HSAMD::V3::AssemblerDirectiveEnd) 4588 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4589 HSAMD::AssemblerDirectiveEnd); 4590 4591 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4592 return Error(getLoc(), 4593 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4594 "not available on non-amdhsa OSes")).str()); 4595 } 4596 4597 std::string HSAMetadataString; 4598 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4599 HSAMetadataString)) 4600 return true; 4601 4602 if (isHsaAbiVersion3(&getSTI())) { 4603 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4604 return Error(getLoc(), "invalid HSA metadata"); 4605 } else { 4606 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4607 return Error(getLoc(), "invalid HSA metadata"); 4608 } 4609 4610 return false; 4611 } 4612 4613 /// Common code to parse out a block of text (typically YAML) between start and 4614 /// end directives. 4615 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4616 const char *AssemblerDirectiveEnd, 4617 std::string &CollectString) { 4618 4619 raw_string_ostream CollectStream(CollectString); 4620 4621 getLexer().setSkipSpace(false); 4622 4623 bool FoundEnd = false; 4624 while (!isToken(AsmToken::Eof)) { 4625 while (isToken(AsmToken::Space)) { 4626 CollectStream << getTokenStr(); 4627 Lex(); 4628 } 4629 4630 if (trySkipId(AssemblerDirectiveEnd)) { 4631 FoundEnd = true; 4632 break; 4633 } 4634 4635 CollectStream << Parser.parseStringToEndOfStatement() 4636 << getContext().getAsmInfo()->getSeparatorString(); 4637 4638 Parser.eatToEndOfStatement(); 4639 } 4640 4641 getLexer().setSkipSpace(true); 4642 4643 if (isToken(AsmToken::Eof) && !FoundEnd) { 4644 return TokError(Twine("expected directive ") + 4645 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4646 } 4647 4648 CollectStream.flush(); 4649 return false; 4650 } 4651 4652 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4653 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4654 std::string String; 4655 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4656 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4657 return true; 4658 4659 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4660 if (!PALMetadata->setFromString(String)) 4661 return Error(getLoc(), "invalid PAL metadata"); 4662 return false; 4663 } 4664 4665 /// Parse the assembler directive for old linear-format PAL metadata. 4666 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4667 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4668 return Error(getLoc(), 4669 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4670 "not available on non-amdpal OSes")).str()); 4671 } 4672 4673 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4674 PALMetadata->setLegacy(); 4675 for (;;) { 4676 uint32_t Key, Value; 4677 if (ParseAsAbsoluteExpression(Key)) { 4678 return TokError(Twine("invalid value in ") + 4679 Twine(PALMD::AssemblerDirective)); 4680 } 4681 if (!trySkipToken(AsmToken::Comma)) { 4682 return TokError(Twine("expected an even number of values in ") + 4683 Twine(PALMD::AssemblerDirective)); 4684 } 4685 if (ParseAsAbsoluteExpression(Value)) { 4686 return TokError(Twine("invalid value in ") + 4687 Twine(PALMD::AssemblerDirective)); 4688 } 4689 PALMetadata->setRegister(Key, Value); 4690 if (!trySkipToken(AsmToken::Comma)) 4691 break; 4692 } 4693 return false; 4694 } 4695 4696 /// ParseDirectiveAMDGPULDS 4697 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4698 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4699 if (getParser().checkForValidSection()) 4700 return true; 4701 4702 StringRef Name; 4703 SMLoc NameLoc = getLoc(); 4704 if (getParser().parseIdentifier(Name)) 4705 return TokError("expected identifier in directive"); 4706 4707 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4708 if (parseToken(AsmToken::Comma, "expected ','")) 4709 return true; 4710 4711 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4712 4713 int64_t Size; 4714 SMLoc SizeLoc = getLoc(); 4715 if (getParser().parseAbsoluteExpression(Size)) 4716 return true; 4717 if (Size < 0) 4718 return Error(SizeLoc, "size must be non-negative"); 4719 if (Size > LocalMemorySize) 4720 return Error(SizeLoc, "size is too large"); 4721 4722 int64_t Alignment = 4; 4723 if (trySkipToken(AsmToken::Comma)) { 4724 SMLoc AlignLoc = getLoc(); 4725 if (getParser().parseAbsoluteExpression(Alignment)) 4726 return true; 4727 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4728 return Error(AlignLoc, "alignment must be a power of two"); 4729 4730 // Alignment larger than the size of LDS is possible in theory, as long 4731 // as the linker manages to place to symbol at address 0, but we do want 4732 // to make sure the alignment fits nicely into a 32-bit integer. 4733 if (Alignment >= 1u << 31) 4734 return Error(AlignLoc, "alignment is too large"); 4735 } 4736 4737 if (parseToken(AsmToken::EndOfStatement, 4738 "unexpected token in '.amdgpu_lds' directive")) 4739 return true; 4740 4741 Symbol->redefineIfPossible(); 4742 if (!Symbol->isUndefined()) 4743 return Error(NameLoc, "invalid symbol redefinition"); 4744 4745 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4746 return false; 4747 } 4748 4749 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4750 StringRef IDVal = DirectiveID.getString(); 4751 4752 if (isHsaAbiVersion3(&getSTI())) { 4753 if (IDVal == ".amdgcn_target") 4754 return ParseDirectiveAMDGCNTarget(); 4755 4756 if (IDVal == ".amdhsa_kernel") 4757 return ParseDirectiveAMDHSAKernel(); 4758 4759 // TODO: Restructure/combine with PAL metadata directive. 4760 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4761 return ParseDirectiveHSAMetadata(); 4762 } else { 4763 if (IDVal == ".hsa_code_object_version") 4764 return ParseDirectiveHSACodeObjectVersion(); 4765 4766 if (IDVal == ".hsa_code_object_isa") 4767 return ParseDirectiveHSACodeObjectISA(); 4768 4769 if (IDVal == ".amd_kernel_code_t") 4770 return ParseDirectiveAMDKernelCodeT(); 4771 4772 if (IDVal == ".amdgpu_hsa_kernel") 4773 return ParseDirectiveAMDGPUHsaKernel(); 4774 4775 if (IDVal == ".amd_amdgpu_isa") 4776 return ParseDirectiveISAVersion(); 4777 4778 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4779 return ParseDirectiveHSAMetadata(); 4780 } 4781 4782 if (IDVal == ".amdgpu_lds") 4783 return ParseDirectiveAMDGPULDS(); 4784 4785 if (IDVal == PALMD::AssemblerDirectiveBegin) 4786 return ParseDirectivePALMetadataBegin(); 4787 4788 if (IDVal == PALMD::AssemblerDirective) 4789 return ParseDirectivePALMetadata(); 4790 4791 return true; 4792 } 4793 4794 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4795 unsigned RegNo) const { 4796 4797 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4798 R.isValid(); ++R) { 4799 if (*R == RegNo) 4800 return isGFX9Plus(); 4801 } 4802 4803 // GFX10 has 2 more SGPRs 104 and 105. 4804 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4805 R.isValid(); ++R) { 4806 if (*R == RegNo) 4807 return hasSGPR104_SGPR105(); 4808 } 4809 4810 switch (RegNo) { 4811 case AMDGPU::SRC_SHARED_BASE: 4812 case AMDGPU::SRC_SHARED_LIMIT: 4813 case AMDGPU::SRC_PRIVATE_BASE: 4814 case AMDGPU::SRC_PRIVATE_LIMIT: 4815 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4816 return isGFX9Plus(); 4817 case AMDGPU::TBA: 4818 case AMDGPU::TBA_LO: 4819 case AMDGPU::TBA_HI: 4820 case AMDGPU::TMA: 4821 case AMDGPU::TMA_LO: 4822 case AMDGPU::TMA_HI: 4823 return !isGFX9Plus(); 4824 case AMDGPU::XNACK_MASK: 4825 case AMDGPU::XNACK_MASK_LO: 4826 case AMDGPU::XNACK_MASK_HI: 4827 return (isVI() || isGFX9()) && hasXNACK(); 4828 case AMDGPU::SGPR_NULL: 4829 return isGFX10Plus(); 4830 default: 4831 break; 4832 } 4833 4834 if (isCI()) 4835 return true; 4836 4837 if (isSI() || isGFX10Plus()) { 4838 // No flat_scr on SI. 4839 // On GFX10 flat scratch is not a valid register operand and can only be 4840 // accessed with s_setreg/s_getreg. 4841 switch (RegNo) { 4842 case AMDGPU::FLAT_SCR: 4843 case AMDGPU::FLAT_SCR_LO: 4844 case AMDGPU::FLAT_SCR_HI: 4845 return false; 4846 default: 4847 return true; 4848 } 4849 } 4850 4851 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4852 // SI/CI have. 4853 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4854 R.isValid(); ++R) { 4855 if (*R == RegNo) 4856 return hasSGPR102_SGPR103(); 4857 } 4858 4859 return true; 4860 } 4861 4862 OperandMatchResultTy 4863 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4864 OperandMode Mode) { 4865 // Try to parse with a custom parser 4866 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4867 4868 // If we successfully parsed the operand or if there as an error parsing, 4869 // we are done. 4870 // 4871 // If we are parsing after we reach EndOfStatement then this means we 4872 // are appending default values to the Operands list. This is only done 4873 // by custom parser, so we shouldn't continue on to the generic parsing. 4874 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4875 isToken(AsmToken::EndOfStatement)) 4876 return ResTy; 4877 4878 SMLoc RBraceLoc; 4879 SMLoc LBraceLoc = getLoc(); 4880 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 4881 unsigned Prefix = Operands.size(); 4882 4883 for (;;) { 4884 ResTy = parseReg(Operands); 4885 if (ResTy != MatchOperand_Success) 4886 return ResTy; 4887 4888 RBraceLoc = getLoc(); 4889 if (trySkipToken(AsmToken::RBrac)) 4890 break; 4891 4892 if (!trySkipToken(AsmToken::Comma)) 4893 return MatchOperand_ParseFail; 4894 } 4895 4896 if (Operands.size() - Prefix > 1) { 4897 Operands.insert(Operands.begin() + Prefix, 4898 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4899 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 4900 } 4901 4902 return MatchOperand_Success; 4903 } 4904 4905 return parseRegOrImm(Operands); 4906 } 4907 4908 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4909 // Clear any forced encodings from the previous instruction. 4910 setForcedEncodingSize(0); 4911 setForcedDPP(false); 4912 setForcedSDWA(false); 4913 4914 if (Name.endswith("_e64")) { 4915 setForcedEncodingSize(64); 4916 return Name.substr(0, Name.size() - 4); 4917 } else if (Name.endswith("_e32")) { 4918 setForcedEncodingSize(32); 4919 return Name.substr(0, Name.size() - 4); 4920 } else if (Name.endswith("_dpp")) { 4921 setForcedDPP(true); 4922 return Name.substr(0, Name.size() - 4); 4923 } else if (Name.endswith("_sdwa")) { 4924 setForcedSDWA(true); 4925 return Name.substr(0, Name.size() - 5); 4926 } 4927 return Name; 4928 } 4929 4930 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4931 StringRef Name, 4932 SMLoc NameLoc, OperandVector &Operands) { 4933 // Add the instruction mnemonic 4934 Name = parseMnemonicSuffix(Name); 4935 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4936 4937 bool IsMIMG = Name.startswith("image_"); 4938 4939 while (!trySkipToken(AsmToken::EndOfStatement)) { 4940 OperandMode Mode = OperandMode_Default; 4941 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 4942 Mode = OperandMode_NSA; 4943 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4944 4945 // Eat the comma or space if there is one. 4946 trySkipToken(AsmToken::Comma); 4947 4948 if (Res != MatchOperand_Success) { 4949 checkUnsupportedInstruction(Name, NameLoc); 4950 if (!Parser.hasPendingError()) { 4951 // FIXME: use real operand location rather than the current location. 4952 StringRef Msg = 4953 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4954 "not a valid operand."; 4955 Error(getLoc(), Msg); 4956 } 4957 while (!trySkipToken(AsmToken::EndOfStatement)) { 4958 lex(); 4959 } 4960 return true; 4961 } 4962 } 4963 4964 return false; 4965 } 4966 4967 //===----------------------------------------------------------------------===// 4968 // Utility functions 4969 //===----------------------------------------------------------------------===// 4970 4971 OperandMatchResultTy 4972 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4973 4974 if (!trySkipId(Prefix, AsmToken::Colon)) 4975 return MatchOperand_NoMatch; 4976 4977 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4978 } 4979 4980 OperandMatchResultTy 4981 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4982 AMDGPUOperand::ImmTy ImmTy, 4983 bool (*ConvertResult)(int64_t&)) { 4984 SMLoc S = getLoc(); 4985 int64_t Value = 0; 4986 4987 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4988 if (Res != MatchOperand_Success) 4989 return Res; 4990 4991 if (ConvertResult && !ConvertResult(Value)) { 4992 Error(S, "invalid " + StringRef(Prefix) + " value."); 4993 } 4994 4995 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4996 return MatchOperand_Success; 4997 } 4998 4999 OperandMatchResultTy 5000 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5001 OperandVector &Operands, 5002 AMDGPUOperand::ImmTy ImmTy, 5003 bool (*ConvertResult)(int64_t&)) { 5004 SMLoc S = getLoc(); 5005 if (!trySkipId(Prefix, AsmToken::Colon)) 5006 return MatchOperand_NoMatch; 5007 5008 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5009 return MatchOperand_ParseFail; 5010 5011 unsigned Val = 0; 5012 const unsigned MaxSize = 4; 5013 5014 // FIXME: How to verify the number of elements matches the number of src 5015 // operands? 5016 for (int I = 0; ; ++I) { 5017 int64_t Op; 5018 SMLoc Loc = getLoc(); 5019 if (!parseExpr(Op)) 5020 return MatchOperand_ParseFail; 5021 5022 if (Op != 0 && Op != 1) { 5023 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5024 return MatchOperand_ParseFail; 5025 } 5026 5027 Val |= (Op << I); 5028 5029 if (trySkipToken(AsmToken::RBrac)) 5030 break; 5031 5032 if (I + 1 == MaxSize) { 5033 Error(getLoc(), "expected a closing square bracket"); 5034 return MatchOperand_ParseFail; 5035 } 5036 5037 if (!skipToken(AsmToken::Comma, "expected a comma")) 5038 return MatchOperand_ParseFail; 5039 } 5040 5041 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5042 return MatchOperand_Success; 5043 } 5044 5045 OperandMatchResultTy 5046 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 5047 AMDGPUOperand::ImmTy ImmTy) { 5048 int64_t Bit = 0; 5049 SMLoc S = getLoc(); 5050 5051 // We are at the end of the statement, and this is a default argument, so 5052 // use a default value. 5053 if (!isToken(AsmToken::EndOfStatement)) { 5054 switch(getTokenKind()) { 5055 case AsmToken::Identifier: { 5056 StringRef Tok = getTokenStr(); 5057 if (Tok == Name) { 5058 if (Tok == "r128" && !hasMIMG_R128()) 5059 Error(S, "r128 modifier is not supported on this GPU"); 5060 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5061 Error(S, "a16 modifier is not supported on this GPU"); 5062 Bit = 1; 5063 Parser.Lex(); 5064 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5065 Bit = 0; 5066 Parser.Lex(); 5067 } else { 5068 return MatchOperand_NoMatch; 5069 } 5070 break; 5071 } 5072 default: 5073 return MatchOperand_NoMatch; 5074 } 5075 } 5076 5077 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) 5078 return MatchOperand_ParseFail; 5079 5080 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5081 ImmTy = AMDGPUOperand::ImmTyR128A16; 5082 5083 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5084 return MatchOperand_Success; 5085 } 5086 5087 static void addOptionalImmOperand( 5088 MCInst& Inst, const OperandVector& Operands, 5089 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5090 AMDGPUOperand::ImmTy ImmT, 5091 int64_t Default = 0) { 5092 auto i = OptionalIdx.find(ImmT); 5093 if (i != OptionalIdx.end()) { 5094 unsigned Idx = i->second; 5095 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5096 } else { 5097 Inst.addOperand(MCOperand::createImm(Default)); 5098 } 5099 } 5100 5101 OperandMatchResultTy 5102 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5103 if (!trySkipId(Prefix, AsmToken::Colon)) 5104 return MatchOperand_NoMatch; 5105 5106 return parseId(Value) ? MatchOperand_Success : MatchOperand_ParseFail; 5107 } 5108 5109 //===----------------------------------------------------------------------===// 5110 // MTBUF format 5111 //===----------------------------------------------------------------------===// 5112 5113 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5114 int64_t MaxVal, 5115 int64_t &Fmt) { 5116 int64_t Val; 5117 SMLoc Loc = getLoc(); 5118 5119 auto Res = parseIntWithPrefix(Pref, Val); 5120 if (Res == MatchOperand_ParseFail) 5121 return false; 5122 if (Res == MatchOperand_NoMatch) 5123 return true; 5124 5125 if (Val < 0 || Val > MaxVal) { 5126 Error(Loc, Twine("out of range ", StringRef(Pref))); 5127 return false; 5128 } 5129 5130 Fmt = Val; 5131 return true; 5132 } 5133 5134 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5135 // values to live in a joint format operand in the MCInst encoding. 5136 OperandMatchResultTy 5137 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5138 using namespace llvm::AMDGPU::MTBUFFormat; 5139 5140 int64_t Dfmt = DFMT_UNDEF; 5141 int64_t Nfmt = NFMT_UNDEF; 5142 5143 // dfmt and nfmt can appear in either order, and each is optional. 5144 for (int I = 0; I < 2; ++I) { 5145 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5146 return MatchOperand_ParseFail; 5147 5148 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5149 return MatchOperand_ParseFail; 5150 } 5151 // Skip optional comma between dfmt/nfmt 5152 // but guard against 2 commas following each other. 5153 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5154 !peekToken().is(AsmToken::Comma)) { 5155 trySkipToken(AsmToken::Comma); 5156 } 5157 } 5158 5159 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5160 return MatchOperand_NoMatch; 5161 5162 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5163 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5164 5165 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5166 return MatchOperand_Success; 5167 } 5168 5169 OperandMatchResultTy 5170 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5171 using namespace llvm::AMDGPU::MTBUFFormat; 5172 5173 int64_t Fmt = UFMT_UNDEF; 5174 5175 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5176 return MatchOperand_ParseFail; 5177 5178 if (Fmt == UFMT_UNDEF) 5179 return MatchOperand_NoMatch; 5180 5181 Format = Fmt; 5182 return MatchOperand_Success; 5183 } 5184 5185 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5186 int64_t &Nfmt, 5187 StringRef FormatStr, 5188 SMLoc Loc) { 5189 using namespace llvm::AMDGPU::MTBUFFormat; 5190 int64_t Format; 5191 5192 Format = getDfmt(FormatStr); 5193 if (Format != DFMT_UNDEF) { 5194 Dfmt = Format; 5195 return true; 5196 } 5197 5198 Format = getNfmt(FormatStr, getSTI()); 5199 if (Format != NFMT_UNDEF) { 5200 Nfmt = Format; 5201 return true; 5202 } 5203 5204 Error(Loc, "unsupported format"); 5205 return false; 5206 } 5207 5208 OperandMatchResultTy 5209 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5210 SMLoc FormatLoc, 5211 int64_t &Format) { 5212 using namespace llvm::AMDGPU::MTBUFFormat; 5213 5214 int64_t Dfmt = DFMT_UNDEF; 5215 int64_t Nfmt = NFMT_UNDEF; 5216 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5217 return MatchOperand_ParseFail; 5218 5219 if (trySkipToken(AsmToken::Comma)) { 5220 StringRef Str; 5221 SMLoc Loc = getLoc(); 5222 if (!parseId(Str, "expected a format string") || 5223 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5224 return MatchOperand_ParseFail; 5225 } 5226 if (Dfmt == DFMT_UNDEF) { 5227 Error(Loc, "duplicate numeric format"); 5228 return MatchOperand_ParseFail; 5229 } else if (Nfmt == NFMT_UNDEF) { 5230 Error(Loc, "duplicate data format"); 5231 return MatchOperand_ParseFail; 5232 } 5233 } 5234 5235 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5236 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5237 5238 if (isGFX10Plus()) { 5239 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5240 if (Ufmt == UFMT_UNDEF) { 5241 Error(FormatLoc, "unsupported format"); 5242 return MatchOperand_ParseFail; 5243 } 5244 Format = Ufmt; 5245 } else { 5246 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5247 } 5248 5249 return MatchOperand_Success; 5250 } 5251 5252 OperandMatchResultTy 5253 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5254 SMLoc Loc, 5255 int64_t &Format) { 5256 using namespace llvm::AMDGPU::MTBUFFormat; 5257 5258 auto Id = getUnifiedFormat(FormatStr); 5259 if (Id == UFMT_UNDEF) 5260 return MatchOperand_NoMatch; 5261 5262 if (!isGFX10Plus()) { 5263 Error(Loc, "unified format is not supported on this GPU"); 5264 return MatchOperand_ParseFail; 5265 } 5266 5267 Format = Id; 5268 return MatchOperand_Success; 5269 } 5270 5271 OperandMatchResultTy 5272 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5273 using namespace llvm::AMDGPU::MTBUFFormat; 5274 SMLoc Loc = getLoc(); 5275 5276 if (!parseExpr(Format)) 5277 return MatchOperand_ParseFail; 5278 if (!isValidFormatEncoding(Format, getSTI())) { 5279 Error(Loc, "out of range format"); 5280 return MatchOperand_ParseFail; 5281 } 5282 5283 return MatchOperand_Success; 5284 } 5285 5286 OperandMatchResultTy 5287 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5288 using namespace llvm::AMDGPU::MTBUFFormat; 5289 5290 if (!trySkipId("format", AsmToken::Colon)) 5291 return MatchOperand_NoMatch; 5292 5293 if (trySkipToken(AsmToken::LBrac)) { 5294 StringRef FormatStr; 5295 SMLoc Loc = getLoc(); 5296 if (!parseId(FormatStr, "expected a format string")) 5297 return MatchOperand_ParseFail; 5298 5299 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5300 if (Res == MatchOperand_NoMatch) 5301 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5302 if (Res != MatchOperand_Success) 5303 return Res; 5304 5305 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5306 return MatchOperand_ParseFail; 5307 5308 return MatchOperand_Success; 5309 } 5310 5311 return parseNumericFormat(Format); 5312 } 5313 5314 OperandMatchResultTy 5315 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5316 using namespace llvm::AMDGPU::MTBUFFormat; 5317 5318 int64_t Format = getDefaultFormatEncoding(getSTI()); 5319 OperandMatchResultTy Res; 5320 SMLoc Loc = getLoc(); 5321 5322 // Parse legacy format syntax. 5323 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5324 if (Res == MatchOperand_ParseFail) 5325 return Res; 5326 5327 bool FormatFound = (Res == MatchOperand_Success); 5328 5329 Operands.push_back( 5330 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5331 5332 if (FormatFound) 5333 trySkipToken(AsmToken::Comma); 5334 5335 if (isToken(AsmToken::EndOfStatement)) { 5336 // We are expecting an soffset operand, 5337 // but let matcher handle the error. 5338 return MatchOperand_Success; 5339 } 5340 5341 // Parse soffset. 5342 Res = parseRegOrImm(Operands); 5343 if (Res != MatchOperand_Success) 5344 return Res; 5345 5346 trySkipToken(AsmToken::Comma); 5347 5348 if (!FormatFound) { 5349 Res = parseSymbolicOrNumericFormat(Format); 5350 if (Res == MatchOperand_ParseFail) 5351 return Res; 5352 if (Res == MatchOperand_Success) { 5353 auto Size = Operands.size(); 5354 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5355 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5356 Op.setImm(Format); 5357 } 5358 return MatchOperand_Success; 5359 } 5360 5361 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5362 Error(getLoc(), "duplicate format"); 5363 return MatchOperand_ParseFail; 5364 } 5365 return MatchOperand_Success; 5366 } 5367 5368 //===----------------------------------------------------------------------===// 5369 // ds 5370 //===----------------------------------------------------------------------===// 5371 5372 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5373 const OperandVector &Operands) { 5374 OptionalImmIndexMap OptionalIdx; 5375 5376 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5377 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5378 5379 // Add the register arguments 5380 if (Op.isReg()) { 5381 Op.addRegOperands(Inst, 1); 5382 continue; 5383 } 5384 5385 // Handle optional arguments 5386 OptionalIdx[Op.getImmTy()] = i; 5387 } 5388 5389 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5390 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5391 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5392 5393 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5394 } 5395 5396 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5397 bool IsGdsHardcoded) { 5398 OptionalImmIndexMap OptionalIdx; 5399 5400 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5401 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5402 5403 // Add the register arguments 5404 if (Op.isReg()) { 5405 Op.addRegOperands(Inst, 1); 5406 continue; 5407 } 5408 5409 if (Op.isToken() && Op.getToken() == "gds") { 5410 IsGdsHardcoded = true; 5411 continue; 5412 } 5413 5414 // Handle optional arguments 5415 OptionalIdx[Op.getImmTy()] = i; 5416 } 5417 5418 AMDGPUOperand::ImmTy OffsetType = 5419 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5420 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5421 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5422 AMDGPUOperand::ImmTyOffset; 5423 5424 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5425 5426 if (!IsGdsHardcoded) { 5427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5428 } 5429 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5430 } 5431 5432 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5433 OptionalImmIndexMap OptionalIdx; 5434 5435 unsigned OperandIdx[4]; 5436 unsigned EnMask = 0; 5437 int SrcIdx = 0; 5438 5439 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5440 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5441 5442 // Add the register arguments 5443 if (Op.isReg()) { 5444 assert(SrcIdx < 4); 5445 OperandIdx[SrcIdx] = Inst.size(); 5446 Op.addRegOperands(Inst, 1); 5447 ++SrcIdx; 5448 continue; 5449 } 5450 5451 if (Op.isOff()) { 5452 assert(SrcIdx < 4); 5453 OperandIdx[SrcIdx] = Inst.size(); 5454 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5455 ++SrcIdx; 5456 continue; 5457 } 5458 5459 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5460 Op.addImmOperands(Inst, 1); 5461 continue; 5462 } 5463 5464 if (Op.isToken() && Op.getToken() == "done") 5465 continue; 5466 5467 // Handle optional arguments 5468 OptionalIdx[Op.getImmTy()] = i; 5469 } 5470 5471 assert(SrcIdx == 4); 5472 5473 bool Compr = false; 5474 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5475 Compr = true; 5476 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5477 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5478 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5479 } 5480 5481 for (auto i = 0; i < SrcIdx; ++i) { 5482 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5483 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5484 } 5485 } 5486 5487 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5488 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5489 5490 Inst.addOperand(MCOperand::createImm(EnMask)); 5491 } 5492 5493 //===----------------------------------------------------------------------===// 5494 // s_waitcnt 5495 //===----------------------------------------------------------------------===// 5496 5497 static bool 5498 encodeCnt( 5499 const AMDGPU::IsaVersion ISA, 5500 int64_t &IntVal, 5501 int64_t CntVal, 5502 bool Saturate, 5503 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5504 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5505 { 5506 bool Failed = false; 5507 5508 IntVal = encode(ISA, IntVal, CntVal); 5509 if (CntVal != decode(ISA, IntVal)) { 5510 if (Saturate) { 5511 IntVal = encode(ISA, IntVal, -1); 5512 } else { 5513 Failed = true; 5514 } 5515 } 5516 return Failed; 5517 } 5518 5519 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5520 5521 SMLoc CntLoc = getLoc(); 5522 StringRef CntName = getTokenStr(); 5523 5524 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5525 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5526 return false; 5527 5528 int64_t CntVal; 5529 SMLoc ValLoc = getLoc(); 5530 if (!parseExpr(CntVal)) 5531 return false; 5532 5533 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5534 5535 bool Failed = true; 5536 bool Sat = CntName.endswith("_sat"); 5537 5538 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5539 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5540 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5541 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5542 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5543 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5544 } else { 5545 Error(CntLoc, "invalid counter name " + CntName); 5546 return false; 5547 } 5548 5549 if (Failed) { 5550 Error(ValLoc, "too large value for " + CntName); 5551 return false; 5552 } 5553 5554 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5555 return false; 5556 5557 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5558 if (isToken(AsmToken::EndOfStatement)) { 5559 Error(getLoc(), "expected a counter name"); 5560 return false; 5561 } 5562 } 5563 5564 return true; 5565 } 5566 5567 OperandMatchResultTy 5568 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5569 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5570 int64_t Waitcnt = getWaitcntBitMask(ISA); 5571 SMLoc S = getLoc(); 5572 5573 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5574 while (!isToken(AsmToken::EndOfStatement)) { 5575 if (!parseCnt(Waitcnt)) 5576 return MatchOperand_ParseFail; 5577 } 5578 } else { 5579 if (!parseExpr(Waitcnt)) 5580 return MatchOperand_ParseFail; 5581 } 5582 5583 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5584 return MatchOperand_Success; 5585 } 5586 5587 bool 5588 AMDGPUOperand::isSWaitCnt() const { 5589 return isImm(); 5590 } 5591 5592 //===----------------------------------------------------------------------===// 5593 // hwreg 5594 //===----------------------------------------------------------------------===// 5595 5596 bool 5597 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5598 OperandInfoTy &Offset, 5599 OperandInfoTy &Width) { 5600 using namespace llvm::AMDGPU::Hwreg; 5601 5602 // The register may be specified by name or using a numeric code 5603 HwReg.Loc = getLoc(); 5604 if (isToken(AsmToken::Identifier) && 5605 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5606 HwReg.IsSymbolic = true; 5607 lex(); // skip register name 5608 } else if (!parseExpr(HwReg.Id, "a register name")) { 5609 return false; 5610 } 5611 5612 if (trySkipToken(AsmToken::RParen)) 5613 return true; 5614 5615 // parse optional params 5616 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 5617 return false; 5618 5619 Offset.Loc = getLoc(); 5620 if (!parseExpr(Offset.Id)) 5621 return false; 5622 5623 if (!skipToken(AsmToken::Comma, "expected a comma")) 5624 return false; 5625 5626 Width.Loc = getLoc(); 5627 return parseExpr(Width.Id) && 5628 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5629 } 5630 5631 bool 5632 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5633 const OperandInfoTy &Offset, 5634 const OperandInfoTy &Width) { 5635 5636 using namespace llvm::AMDGPU::Hwreg; 5637 5638 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5639 Error(HwReg.Loc, 5640 "specified hardware register is not supported on this GPU"); 5641 return false; 5642 } 5643 if (!isValidHwreg(HwReg.Id)) { 5644 Error(HwReg.Loc, 5645 "invalid code of hardware register: only 6-bit values are legal"); 5646 return false; 5647 } 5648 if (!isValidHwregOffset(Offset.Id)) { 5649 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 5650 return false; 5651 } 5652 if (!isValidHwregWidth(Width.Id)) { 5653 Error(Width.Loc, 5654 "invalid bitfield width: only values from 1 to 32 are legal"); 5655 return false; 5656 } 5657 return true; 5658 } 5659 5660 OperandMatchResultTy 5661 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5662 using namespace llvm::AMDGPU::Hwreg; 5663 5664 int64_t ImmVal = 0; 5665 SMLoc Loc = getLoc(); 5666 5667 if (trySkipId("hwreg", AsmToken::LParen)) { 5668 OperandInfoTy HwReg(ID_UNKNOWN_); 5669 OperandInfoTy Offset(OFFSET_DEFAULT_); 5670 OperandInfoTy Width(WIDTH_DEFAULT_); 5671 if (parseHwregBody(HwReg, Offset, Width) && 5672 validateHwreg(HwReg, Offset, Width)) { 5673 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 5674 } else { 5675 return MatchOperand_ParseFail; 5676 } 5677 } else if (parseExpr(ImmVal, "a hwreg macro")) { 5678 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5679 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5680 return MatchOperand_ParseFail; 5681 } 5682 } else { 5683 return MatchOperand_ParseFail; 5684 } 5685 5686 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5687 return MatchOperand_Success; 5688 } 5689 5690 bool AMDGPUOperand::isHwreg() const { 5691 return isImmTy(ImmTyHwreg); 5692 } 5693 5694 //===----------------------------------------------------------------------===// 5695 // sendmsg 5696 //===----------------------------------------------------------------------===// 5697 5698 bool 5699 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5700 OperandInfoTy &Op, 5701 OperandInfoTy &Stream) { 5702 using namespace llvm::AMDGPU::SendMsg; 5703 5704 Msg.Loc = getLoc(); 5705 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5706 Msg.IsSymbolic = true; 5707 lex(); // skip message name 5708 } else if (!parseExpr(Msg.Id, "a message name")) { 5709 return false; 5710 } 5711 5712 if (trySkipToken(AsmToken::Comma)) { 5713 Op.IsDefined = true; 5714 Op.Loc = getLoc(); 5715 if (isToken(AsmToken::Identifier) && 5716 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5717 lex(); // skip operation name 5718 } else if (!parseExpr(Op.Id, "an operation name")) { 5719 return false; 5720 } 5721 5722 if (trySkipToken(AsmToken::Comma)) { 5723 Stream.IsDefined = true; 5724 Stream.Loc = getLoc(); 5725 if (!parseExpr(Stream.Id)) 5726 return false; 5727 } 5728 } 5729 5730 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5731 } 5732 5733 bool 5734 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5735 const OperandInfoTy &Op, 5736 const OperandInfoTy &Stream) { 5737 using namespace llvm::AMDGPU::SendMsg; 5738 5739 // Validation strictness depends on whether message is specified 5740 // in a symbolc or in a numeric form. In the latter case 5741 // only encoding possibility is checked. 5742 bool Strict = Msg.IsSymbolic; 5743 5744 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5745 Error(Msg.Loc, "invalid message id"); 5746 return false; 5747 } 5748 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5749 if (Op.IsDefined) { 5750 Error(Op.Loc, "message does not support operations"); 5751 } else { 5752 Error(Msg.Loc, "missing message operation"); 5753 } 5754 return false; 5755 } 5756 if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5757 Error(Op.Loc, "invalid operation id"); 5758 return false; 5759 } 5760 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5761 Error(Stream.Loc, "message operation does not support streams"); 5762 return false; 5763 } 5764 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5765 Error(Stream.Loc, "invalid message stream id"); 5766 return false; 5767 } 5768 return true; 5769 } 5770 5771 OperandMatchResultTy 5772 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5773 using namespace llvm::AMDGPU::SendMsg; 5774 5775 int64_t ImmVal = 0; 5776 SMLoc Loc = getLoc(); 5777 5778 if (trySkipId("sendmsg", AsmToken::LParen)) { 5779 OperandInfoTy Msg(ID_UNKNOWN_); 5780 OperandInfoTy Op(OP_NONE_); 5781 OperandInfoTy Stream(STREAM_ID_NONE_); 5782 if (parseSendMsgBody(Msg, Op, Stream) && 5783 validateSendMsg(Msg, Op, Stream)) { 5784 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5785 } else { 5786 return MatchOperand_ParseFail; 5787 } 5788 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 5789 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5790 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5791 return MatchOperand_ParseFail; 5792 } 5793 } else { 5794 return MatchOperand_ParseFail; 5795 } 5796 5797 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5798 return MatchOperand_Success; 5799 } 5800 5801 bool AMDGPUOperand::isSendMsg() const { 5802 return isImmTy(ImmTySendMsg); 5803 } 5804 5805 //===----------------------------------------------------------------------===// 5806 // v_interp 5807 //===----------------------------------------------------------------------===// 5808 5809 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5810 StringRef Str; 5811 SMLoc S = getLoc(); 5812 5813 if (!parseId(Str)) 5814 return MatchOperand_NoMatch; 5815 5816 int Slot = StringSwitch<int>(Str) 5817 .Case("p10", 0) 5818 .Case("p20", 1) 5819 .Case("p0", 2) 5820 .Default(-1); 5821 5822 if (Slot == -1) { 5823 Error(S, "invalid interpolation slot"); 5824 return MatchOperand_ParseFail; 5825 } 5826 5827 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5828 AMDGPUOperand::ImmTyInterpSlot)); 5829 return MatchOperand_Success; 5830 } 5831 5832 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5833 StringRef Str; 5834 SMLoc S = getLoc(); 5835 5836 if (!parseId(Str)) 5837 return MatchOperand_NoMatch; 5838 5839 if (!Str.startswith("attr")) { 5840 Error(S, "invalid interpolation attribute"); 5841 return MatchOperand_ParseFail; 5842 } 5843 5844 StringRef Chan = Str.take_back(2); 5845 int AttrChan = StringSwitch<int>(Chan) 5846 .Case(".x", 0) 5847 .Case(".y", 1) 5848 .Case(".z", 2) 5849 .Case(".w", 3) 5850 .Default(-1); 5851 if (AttrChan == -1) { 5852 Error(S, "invalid or missing interpolation attribute channel"); 5853 return MatchOperand_ParseFail; 5854 } 5855 5856 Str = Str.drop_back(2).drop_front(4); 5857 5858 uint8_t Attr; 5859 if (Str.getAsInteger(10, Attr)) { 5860 Error(S, "invalid or missing interpolation attribute number"); 5861 return MatchOperand_ParseFail; 5862 } 5863 5864 if (Attr > 63) { 5865 Error(S, "out of bounds interpolation attribute number"); 5866 return MatchOperand_ParseFail; 5867 } 5868 5869 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5870 5871 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5872 AMDGPUOperand::ImmTyInterpAttr)); 5873 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5874 AMDGPUOperand::ImmTyAttrChan)); 5875 return MatchOperand_Success; 5876 } 5877 5878 //===----------------------------------------------------------------------===// 5879 // exp 5880 //===----------------------------------------------------------------------===// 5881 5882 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5883 uint8_t &Val) { 5884 if (Str == "null") { 5885 Val = Exp::ET_NULL; 5886 return MatchOperand_Success; 5887 } 5888 5889 if (Str.startswith("mrt")) { 5890 Str = Str.drop_front(3); 5891 if (Str == "z") { // == mrtz 5892 Val = Exp::ET_MRTZ; 5893 return MatchOperand_Success; 5894 } 5895 5896 if (Str.getAsInteger(10, Val)) 5897 return MatchOperand_ParseFail; 5898 5899 if (Val > Exp::ET_MRT7) 5900 return MatchOperand_ParseFail; 5901 5902 return MatchOperand_Success; 5903 } 5904 5905 if (Str.startswith("pos")) { 5906 Str = Str.drop_front(3); 5907 if (Str.getAsInteger(10, Val)) 5908 return MatchOperand_ParseFail; 5909 5910 if (Val > (isGFX10Plus() ? 4 : 3)) 5911 return MatchOperand_ParseFail; 5912 5913 Val += Exp::ET_POS0; 5914 return MatchOperand_Success; 5915 } 5916 5917 if (isGFX10Plus() && Str == "prim") { 5918 Val = Exp::ET_PRIM; 5919 return MatchOperand_Success; 5920 } 5921 5922 if (Str.startswith("param")) { 5923 Str = Str.drop_front(5); 5924 if (Str.getAsInteger(10, Val)) 5925 return MatchOperand_ParseFail; 5926 5927 if (Val >= 32) 5928 return MatchOperand_ParseFail; 5929 5930 Val += Exp::ET_PARAM0; 5931 return MatchOperand_Success; 5932 } 5933 5934 return MatchOperand_ParseFail; 5935 } 5936 5937 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5938 StringRef Str; 5939 SMLoc S = getLoc(); 5940 5941 if (!parseId(Str)) 5942 return MatchOperand_NoMatch; 5943 5944 uint8_t Val; 5945 auto Res = parseExpTgtImpl(Str, Val); 5946 if (Res != MatchOperand_Success) { 5947 Error(S, "invalid exp target"); 5948 return Res; 5949 } 5950 5951 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5952 AMDGPUOperand::ImmTyExpTgt)); 5953 return MatchOperand_Success; 5954 } 5955 5956 //===----------------------------------------------------------------------===// 5957 // parser helpers 5958 //===----------------------------------------------------------------------===// 5959 5960 bool 5961 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5962 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5963 } 5964 5965 bool 5966 AMDGPUAsmParser::isId(const StringRef Id) const { 5967 return isId(getToken(), Id); 5968 } 5969 5970 bool 5971 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5972 return getTokenKind() == Kind; 5973 } 5974 5975 bool 5976 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5977 if (isId(Id)) { 5978 lex(); 5979 return true; 5980 } 5981 return false; 5982 } 5983 5984 bool 5985 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5986 if (isId(Id) && peekToken().is(Kind)) { 5987 lex(); 5988 lex(); 5989 return true; 5990 } 5991 return false; 5992 } 5993 5994 bool 5995 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5996 if (isToken(Kind)) { 5997 lex(); 5998 return true; 5999 } 6000 return false; 6001 } 6002 6003 bool 6004 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6005 const StringRef ErrMsg) { 6006 if (!trySkipToken(Kind)) { 6007 Error(getLoc(), ErrMsg); 6008 return false; 6009 } 6010 return true; 6011 } 6012 6013 bool 6014 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6015 SMLoc S = getLoc(); 6016 6017 const MCExpr *Expr; 6018 if (Parser.parseExpression(Expr)) 6019 return false; 6020 6021 if (Expr->evaluateAsAbsolute(Imm)) 6022 return true; 6023 6024 if (Expected.empty()) { 6025 Error(S, "expected absolute expression"); 6026 } else { 6027 Error(S, Twine("expected ", Expected) + 6028 Twine(" or an absolute expression")); 6029 } 6030 return false; 6031 } 6032 6033 bool 6034 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6035 SMLoc S = getLoc(); 6036 6037 const MCExpr *Expr; 6038 if (Parser.parseExpression(Expr)) 6039 return false; 6040 6041 int64_t IntVal; 6042 if (Expr->evaluateAsAbsolute(IntVal)) { 6043 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6044 } else { 6045 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6046 } 6047 return true; 6048 } 6049 6050 bool 6051 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6052 if (isToken(AsmToken::String)) { 6053 Val = getToken().getStringContents(); 6054 lex(); 6055 return true; 6056 } else { 6057 Error(getLoc(), ErrMsg); 6058 return false; 6059 } 6060 } 6061 6062 bool 6063 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6064 if (isToken(AsmToken::Identifier)) { 6065 Val = getTokenStr(); 6066 lex(); 6067 return true; 6068 } else { 6069 if (!ErrMsg.empty()) 6070 Error(getLoc(), ErrMsg); 6071 return false; 6072 } 6073 } 6074 6075 AsmToken 6076 AMDGPUAsmParser::getToken() const { 6077 return Parser.getTok(); 6078 } 6079 6080 AsmToken 6081 AMDGPUAsmParser::peekToken() { 6082 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6083 } 6084 6085 void 6086 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6087 auto TokCount = getLexer().peekTokens(Tokens); 6088 6089 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6090 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6091 } 6092 6093 AsmToken::TokenKind 6094 AMDGPUAsmParser::getTokenKind() const { 6095 return getLexer().getKind(); 6096 } 6097 6098 SMLoc 6099 AMDGPUAsmParser::getLoc() const { 6100 return getToken().getLoc(); 6101 } 6102 6103 StringRef 6104 AMDGPUAsmParser::getTokenStr() const { 6105 return getToken().getString(); 6106 } 6107 6108 void 6109 AMDGPUAsmParser::lex() { 6110 Parser.Lex(); 6111 } 6112 6113 SMLoc 6114 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6115 const OperandVector &Operands) const { 6116 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6117 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6118 if (Test(Op)) 6119 return Op.getStartLoc(); 6120 } 6121 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6122 } 6123 6124 SMLoc 6125 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6126 const OperandVector &Operands) const { 6127 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6128 return getOperandLoc(Test, Operands); 6129 } 6130 6131 SMLoc 6132 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6133 const OperandVector &Operands) const { 6134 auto Test = [=](const AMDGPUOperand& Op) { 6135 return Op.isRegKind() && Op.getReg() == Reg; 6136 }; 6137 return getOperandLoc(Test, Operands); 6138 } 6139 6140 SMLoc 6141 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6142 auto Test = [](const AMDGPUOperand& Op) { 6143 return Op.IsImmKindLiteral() || Op.isExpr(); 6144 }; 6145 return getOperandLoc(Test, Operands); 6146 } 6147 6148 SMLoc 6149 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6150 auto Test = [](const AMDGPUOperand& Op) { 6151 return Op.isImmKindConst(); 6152 }; 6153 return getOperandLoc(Test, Operands); 6154 } 6155 6156 //===----------------------------------------------------------------------===// 6157 // swizzle 6158 //===----------------------------------------------------------------------===// 6159 6160 LLVM_READNONE 6161 static unsigned 6162 encodeBitmaskPerm(const unsigned AndMask, 6163 const unsigned OrMask, 6164 const unsigned XorMask) { 6165 using namespace llvm::AMDGPU::Swizzle; 6166 6167 return BITMASK_PERM_ENC | 6168 (AndMask << BITMASK_AND_SHIFT) | 6169 (OrMask << BITMASK_OR_SHIFT) | 6170 (XorMask << BITMASK_XOR_SHIFT); 6171 } 6172 6173 bool 6174 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6175 const unsigned MinVal, 6176 const unsigned MaxVal, 6177 const StringRef ErrMsg, 6178 SMLoc &Loc) { 6179 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6180 return false; 6181 } 6182 Loc = getLoc(); 6183 if (!parseExpr(Op)) { 6184 return false; 6185 } 6186 if (Op < MinVal || Op > MaxVal) { 6187 Error(Loc, ErrMsg); 6188 return false; 6189 } 6190 6191 return true; 6192 } 6193 6194 bool 6195 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6196 const unsigned MinVal, 6197 const unsigned MaxVal, 6198 const StringRef ErrMsg) { 6199 SMLoc Loc; 6200 for (unsigned i = 0; i < OpNum; ++i) { 6201 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6202 return false; 6203 } 6204 6205 return true; 6206 } 6207 6208 bool 6209 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6210 using namespace llvm::AMDGPU::Swizzle; 6211 6212 int64_t Lane[LANE_NUM]; 6213 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6214 "expected a 2-bit lane id")) { 6215 Imm = QUAD_PERM_ENC; 6216 for (unsigned I = 0; I < LANE_NUM; ++I) { 6217 Imm |= Lane[I] << (LANE_SHIFT * I); 6218 } 6219 return true; 6220 } 6221 return false; 6222 } 6223 6224 bool 6225 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6226 using namespace llvm::AMDGPU::Swizzle; 6227 6228 SMLoc Loc; 6229 int64_t GroupSize; 6230 int64_t LaneIdx; 6231 6232 if (!parseSwizzleOperand(GroupSize, 6233 2, 32, 6234 "group size must be in the interval [2,32]", 6235 Loc)) { 6236 return false; 6237 } 6238 if (!isPowerOf2_64(GroupSize)) { 6239 Error(Loc, "group size must be a power of two"); 6240 return false; 6241 } 6242 if (parseSwizzleOperand(LaneIdx, 6243 0, GroupSize - 1, 6244 "lane id must be in the interval [0,group size - 1]", 6245 Loc)) { 6246 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6247 return true; 6248 } 6249 return false; 6250 } 6251 6252 bool 6253 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6254 using namespace llvm::AMDGPU::Swizzle; 6255 6256 SMLoc Loc; 6257 int64_t GroupSize; 6258 6259 if (!parseSwizzleOperand(GroupSize, 6260 2, 32, 6261 "group size must be in the interval [2,32]", 6262 Loc)) { 6263 return false; 6264 } 6265 if (!isPowerOf2_64(GroupSize)) { 6266 Error(Loc, "group size must be a power of two"); 6267 return false; 6268 } 6269 6270 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6271 return true; 6272 } 6273 6274 bool 6275 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6276 using namespace llvm::AMDGPU::Swizzle; 6277 6278 SMLoc Loc; 6279 int64_t GroupSize; 6280 6281 if (!parseSwizzleOperand(GroupSize, 6282 1, 16, 6283 "group size must be in the interval [1,16]", 6284 Loc)) { 6285 return false; 6286 } 6287 if (!isPowerOf2_64(GroupSize)) { 6288 Error(Loc, "group size must be a power of two"); 6289 return false; 6290 } 6291 6292 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6293 return true; 6294 } 6295 6296 bool 6297 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6298 using namespace llvm::AMDGPU::Swizzle; 6299 6300 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6301 return false; 6302 } 6303 6304 StringRef Ctl; 6305 SMLoc StrLoc = getLoc(); 6306 if (!parseString(Ctl)) { 6307 return false; 6308 } 6309 if (Ctl.size() != BITMASK_WIDTH) { 6310 Error(StrLoc, "expected a 5-character mask"); 6311 return false; 6312 } 6313 6314 unsigned AndMask = 0; 6315 unsigned OrMask = 0; 6316 unsigned XorMask = 0; 6317 6318 for (size_t i = 0; i < Ctl.size(); ++i) { 6319 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6320 switch(Ctl[i]) { 6321 default: 6322 Error(StrLoc, "invalid mask"); 6323 return false; 6324 case '0': 6325 break; 6326 case '1': 6327 OrMask |= Mask; 6328 break; 6329 case 'p': 6330 AndMask |= Mask; 6331 break; 6332 case 'i': 6333 AndMask |= Mask; 6334 XorMask |= Mask; 6335 break; 6336 } 6337 } 6338 6339 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6340 return true; 6341 } 6342 6343 bool 6344 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6345 6346 SMLoc OffsetLoc = getLoc(); 6347 6348 if (!parseExpr(Imm, "a swizzle macro")) { 6349 return false; 6350 } 6351 if (!isUInt<16>(Imm)) { 6352 Error(OffsetLoc, "expected a 16-bit offset"); 6353 return false; 6354 } 6355 return true; 6356 } 6357 6358 bool 6359 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6360 using namespace llvm::AMDGPU::Swizzle; 6361 6362 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6363 6364 SMLoc ModeLoc = getLoc(); 6365 bool Ok = false; 6366 6367 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6368 Ok = parseSwizzleQuadPerm(Imm); 6369 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6370 Ok = parseSwizzleBitmaskPerm(Imm); 6371 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6372 Ok = parseSwizzleBroadcast(Imm); 6373 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6374 Ok = parseSwizzleSwap(Imm); 6375 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6376 Ok = parseSwizzleReverse(Imm); 6377 } else { 6378 Error(ModeLoc, "expected a swizzle mode"); 6379 } 6380 6381 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6382 } 6383 6384 return false; 6385 } 6386 6387 OperandMatchResultTy 6388 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6389 SMLoc S = getLoc(); 6390 int64_t Imm = 0; 6391 6392 if (trySkipId("offset")) { 6393 6394 bool Ok = false; 6395 if (skipToken(AsmToken::Colon, "expected a colon")) { 6396 if (trySkipId("swizzle")) { 6397 Ok = parseSwizzleMacro(Imm); 6398 } else { 6399 Ok = parseSwizzleOffset(Imm); 6400 } 6401 } 6402 6403 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6404 6405 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6406 } else { 6407 // Swizzle "offset" operand is optional. 6408 // If it is omitted, try parsing other optional operands. 6409 return parseOptionalOpr(Operands); 6410 } 6411 } 6412 6413 bool 6414 AMDGPUOperand::isSwizzle() const { 6415 return isImmTy(ImmTySwizzle); 6416 } 6417 6418 //===----------------------------------------------------------------------===// 6419 // VGPR Index Mode 6420 //===----------------------------------------------------------------------===// 6421 6422 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6423 6424 using namespace llvm::AMDGPU::VGPRIndexMode; 6425 6426 if (trySkipToken(AsmToken::RParen)) { 6427 return OFF; 6428 } 6429 6430 int64_t Imm = 0; 6431 6432 while (true) { 6433 unsigned Mode = 0; 6434 SMLoc S = getLoc(); 6435 6436 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6437 if (trySkipId(IdSymbolic[ModeId])) { 6438 Mode = 1 << ModeId; 6439 break; 6440 } 6441 } 6442 6443 if (Mode == 0) { 6444 Error(S, (Imm == 0)? 6445 "expected a VGPR index mode or a closing parenthesis" : 6446 "expected a VGPR index mode"); 6447 return UNDEF; 6448 } 6449 6450 if (Imm & Mode) { 6451 Error(S, "duplicate VGPR index mode"); 6452 return UNDEF; 6453 } 6454 Imm |= Mode; 6455 6456 if (trySkipToken(AsmToken::RParen)) 6457 break; 6458 if (!skipToken(AsmToken::Comma, 6459 "expected a comma or a closing parenthesis")) 6460 return UNDEF; 6461 } 6462 6463 return Imm; 6464 } 6465 6466 OperandMatchResultTy 6467 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6468 6469 using namespace llvm::AMDGPU::VGPRIndexMode; 6470 6471 int64_t Imm = 0; 6472 SMLoc S = getLoc(); 6473 6474 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6475 Imm = parseGPRIdxMacro(); 6476 if (Imm == UNDEF) 6477 return MatchOperand_ParseFail; 6478 } else { 6479 if (getParser().parseAbsoluteExpression(Imm)) 6480 return MatchOperand_ParseFail; 6481 if (Imm < 0 || !isUInt<4>(Imm)) { 6482 Error(S, "invalid immediate: only 4-bit values are legal"); 6483 return MatchOperand_ParseFail; 6484 } 6485 } 6486 6487 Operands.push_back( 6488 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6489 return MatchOperand_Success; 6490 } 6491 6492 bool AMDGPUOperand::isGPRIdxMode() const { 6493 return isImmTy(ImmTyGprIdxMode); 6494 } 6495 6496 //===----------------------------------------------------------------------===// 6497 // sopp branch targets 6498 //===----------------------------------------------------------------------===// 6499 6500 OperandMatchResultTy 6501 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6502 6503 // Make sure we are not parsing something 6504 // that looks like a label or an expression but is not. 6505 // This will improve error messages. 6506 if (isRegister() || isModifier()) 6507 return MatchOperand_NoMatch; 6508 6509 if (!parseExpr(Operands)) 6510 return MatchOperand_ParseFail; 6511 6512 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6513 assert(Opr.isImm() || Opr.isExpr()); 6514 SMLoc Loc = Opr.getStartLoc(); 6515 6516 // Currently we do not support arbitrary expressions as branch targets. 6517 // Only labels and absolute expressions are accepted. 6518 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6519 Error(Loc, "expected an absolute expression or a label"); 6520 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6521 Error(Loc, "expected a 16-bit signed jump offset"); 6522 } 6523 6524 return MatchOperand_Success; 6525 } 6526 6527 //===----------------------------------------------------------------------===// 6528 // Boolean holding registers 6529 //===----------------------------------------------------------------------===// 6530 6531 OperandMatchResultTy 6532 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6533 return parseReg(Operands); 6534 } 6535 6536 //===----------------------------------------------------------------------===// 6537 // mubuf 6538 //===----------------------------------------------------------------------===// 6539 6540 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6541 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6542 } 6543 6544 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6545 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6546 } 6547 6548 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6549 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6550 } 6551 6552 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6553 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6554 } 6555 6556 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6557 const OperandVector &Operands, 6558 bool IsAtomic, 6559 bool IsAtomicReturn, 6560 bool IsLds) { 6561 bool IsLdsOpcode = IsLds; 6562 bool HasLdsModifier = false; 6563 OptionalImmIndexMap OptionalIdx; 6564 assert(IsAtomicReturn ? IsAtomic : true); 6565 unsigned FirstOperandIdx = 1; 6566 6567 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6568 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6569 6570 // Add the register arguments 6571 if (Op.isReg()) { 6572 Op.addRegOperands(Inst, 1); 6573 // Insert a tied src for atomic return dst. 6574 // This cannot be postponed as subsequent calls to 6575 // addImmOperands rely on correct number of MC operands. 6576 if (IsAtomicReturn && i == FirstOperandIdx) 6577 Op.addRegOperands(Inst, 1); 6578 continue; 6579 } 6580 6581 // Handle the case where soffset is an immediate 6582 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6583 Op.addImmOperands(Inst, 1); 6584 continue; 6585 } 6586 6587 HasLdsModifier |= Op.isLDS(); 6588 6589 // Handle tokens like 'offen' which are sometimes hard-coded into the 6590 // asm string. There are no MCInst operands for these. 6591 if (Op.isToken()) { 6592 continue; 6593 } 6594 assert(Op.isImm()); 6595 6596 // Handle optional arguments 6597 OptionalIdx[Op.getImmTy()] = i; 6598 } 6599 6600 // This is a workaround for an llvm quirk which may result in an 6601 // incorrect instruction selection. Lds and non-lds versions of 6602 // MUBUF instructions are identical except that lds versions 6603 // have mandatory 'lds' modifier. However this modifier follows 6604 // optional modifiers and llvm asm matcher regards this 'lds' 6605 // modifier as an optional one. As a result, an lds version 6606 // of opcode may be selected even if it has no 'lds' modifier. 6607 if (IsLdsOpcode && !HasLdsModifier) { 6608 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6609 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6610 Inst.setOpcode(NoLdsOpcode); 6611 IsLdsOpcode = false; 6612 } 6613 } 6614 6615 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6616 if (!IsAtomic || IsAtomicReturn) { 6617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, 6618 IsAtomicReturn ? -1 : 0); 6619 } 6620 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6621 6622 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6623 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6624 } 6625 6626 if (isGFX10Plus()) 6627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6628 } 6629 6630 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6631 OptionalImmIndexMap OptionalIdx; 6632 6633 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6634 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6635 6636 // Add the register arguments 6637 if (Op.isReg()) { 6638 Op.addRegOperands(Inst, 1); 6639 continue; 6640 } 6641 6642 // Handle the case where soffset is an immediate 6643 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6644 Op.addImmOperands(Inst, 1); 6645 continue; 6646 } 6647 6648 // Handle tokens like 'offen' which are sometimes hard-coded into the 6649 // asm string. There are no MCInst operands for these. 6650 if (Op.isToken()) { 6651 continue; 6652 } 6653 assert(Op.isImm()); 6654 6655 // Handle optional arguments 6656 OptionalIdx[Op.getImmTy()] = i; 6657 } 6658 6659 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6660 AMDGPUOperand::ImmTyOffset); 6661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6665 6666 if (isGFX10Plus()) 6667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6668 } 6669 6670 //===----------------------------------------------------------------------===// 6671 // mimg 6672 //===----------------------------------------------------------------------===// 6673 6674 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6675 bool IsAtomic) { 6676 unsigned I = 1; 6677 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6678 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6679 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6680 } 6681 6682 if (IsAtomic) { 6683 // Add src, same as dst 6684 assert(Desc.getNumDefs() == 1); 6685 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6686 } 6687 6688 OptionalImmIndexMap OptionalIdx; 6689 6690 for (unsigned E = Operands.size(); I != E; ++I) { 6691 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6692 6693 // Add the register arguments 6694 if (Op.isReg()) { 6695 Op.addRegOperands(Inst, 1); 6696 } else if (Op.isImmModifier()) { 6697 OptionalIdx[Op.getImmTy()] = I; 6698 } else if (!Op.isToken()) { 6699 llvm_unreachable("unexpected operand type"); 6700 } 6701 } 6702 6703 bool IsGFX10Plus = isGFX10Plus(); 6704 6705 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6706 if (IsGFX10Plus) 6707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6709 if (IsGFX10Plus) 6710 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6711 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6712 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6713 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6714 if (IsGFX10Plus) 6715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6716 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6718 if (!IsGFX10Plus) 6719 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6720 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6721 } 6722 6723 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6724 cvtMIMG(Inst, Operands, true); 6725 } 6726 6727 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6728 const OperandVector &Operands) { 6729 for (unsigned I = 1; I < Operands.size(); ++I) { 6730 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6731 if (Operand.isReg()) 6732 Operand.addRegOperands(Inst, 1); 6733 } 6734 6735 Inst.addOperand(MCOperand::createImm(1)); // a16 6736 } 6737 6738 //===----------------------------------------------------------------------===// 6739 // smrd 6740 //===----------------------------------------------------------------------===// 6741 6742 bool AMDGPUOperand::isSMRDOffset8() const { 6743 return isImm() && isUInt<8>(getImm()); 6744 } 6745 6746 bool AMDGPUOperand::isSMEMOffset() const { 6747 return isImm(); // Offset range is checked later by validator. 6748 } 6749 6750 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6751 // 32-bit literals are only supported on CI and we only want to use them 6752 // when the offset is > 8-bits. 6753 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6754 } 6755 6756 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6757 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6758 } 6759 6760 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6761 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6762 } 6763 6764 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6765 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6766 } 6767 6768 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6769 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6770 } 6771 6772 //===----------------------------------------------------------------------===// 6773 // vop3 6774 //===----------------------------------------------------------------------===// 6775 6776 static bool ConvertOmodMul(int64_t &Mul) { 6777 if (Mul != 1 && Mul != 2 && Mul != 4) 6778 return false; 6779 6780 Mul >>= 1; 6781 return true; 6782 } 6783 6784 static bool ConvertOmodDiv(int64_t &Div) { 6785 if (Div == 1) { 6786 Div = 0; 6787 return true; 6788 } 6789 6790 if (Div == 2) { 6791 Div = 3; 6792 return true; 6793 } 6794 6795 return false; 6796 } 6797 6798 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6799 if (BoundCtrl == 0) { 6800 BoundCtrl = 1; 6801 return true; 6802 } 6803 6804 if (BoundCtrl == -1) { 6805 BoundCtrl = 0; 6806 return true; 6807 } 6808 6809 return false; 6810 } 6811 6812 // Note: the order in this table matches the order of operands in AsmString. 6813 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6814 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6815 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6816 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6817 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6818 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6819 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6820 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6821 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6822 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6823 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6824 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6825 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6826 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6827 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6828 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6829 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6830 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6831 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6832 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6833 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6834 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6835 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6836 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6837 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6838 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6839 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6840 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6841 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6842 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6843 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6844 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6845 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6846 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6847 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6848 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6849 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6850 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6851 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6852 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6853 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6854 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6855 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6856 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6857 }; 6858 6859 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6860 6861 OperandMatchResultTy res = parseOptionalOpr(Operands); 6862 6863 // This is a hack to enable hardcoded mandatory operands which follow 6864 // optional operands. 6865 // 6866 // Current design assumes that all operands after the first optional operand 6867 // are also optional. However implementation of some instructions violates 6868 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6869 // 6870 // To alleviate this problem, we have to (implicitly) parse extra operands 6871 // to make sure autogenerated parser of custom operands never hit hardcoded 6872 // mandatory operands. 6873 6874 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6875 if (res != MatchOperand_Success || 6876 isToken(AsmToken::EndOfStatement)) 6877 break; 6878 6879 trySkipToken(AsmToken::Comma); 6880 res = parseOptionalOpr(Operands); 6881 } 6882 6883 return res; 6884 } 6885 6886 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6887 OperandMatchResultTy res; 6888 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6889 // try to parse any optional operand here 6890 if (Op.IsBit) { 6891 res = parseNamedBit(Op.Name, Operands, Op.Type); 6892 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6893 res = parseOModOperand(Operands); 6894 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6895 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6896 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6897 res = parseSDWASel(Operands, Op.Name, Op.Type); 6898 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6899 res = parseSDWADstUnused(Operands); 6900 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6901 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6902 Op.Type == AMDGPUOperand::ImmTyNegLo || 6903 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6904 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6905 Op.ConvertResult); 6906 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6907 res = parseDim(Operands); 6908 } else { 6909 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6910 } 6911 if (res != MatchOperand_NoMatch) { 6912 return res; 6913 } 6914 } 6915 return MatchOperand_NoMatch; 6916 } 6917 6918 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6919 StringRef Name = getTokenStr(); 6920 if (Name == "mul") { 6921 return parseIntWithPrefix("mul", Operands, 6922 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6923 } 6924 6925 if (Name == "div") { 6926 return parseIntWithPrefix("div", Operands, 6927 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6928 } 6929 6930 return MatchOperand_NoMatch; 6931 } 6932 6933 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6934 cvtVOP3P(Inst, Operands); 6935 6936 int Opc = Inst.getOpcode(); 6937 6938 int SrcNum; 6939 const int Ops[] = { AMDGPU::OpName::src0, 6940 AMDGPU::OpName::src1, 6941 AMDGPU::OpName::src2 }; 6942 for (SrcNum = 0; 6943 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6944 ++SrcNum); 6945 assert(SrcNum > 0); 6946 6947 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6948 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6949 6950 if ((OpSel & (1 << SrcNum)) != 0) { 6951 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6952 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6953 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6954 } 6955 } 6956 6957 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6958 // 1. This operand is input modifiers 6959 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6960 // 2. This is not last operand 6961 && Desc.NumOperands > (OpNum + 1) 6962 // 3. Next operand is register class 6963 && Desc.OpInfo[OpNum + 1].RegClass != -1 6964 // 4. Next register is not tied to any other operand 6965 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6966 } 6967 6968 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6969 { 6970 OptionalImmIndexMap OptionalIdx; 6971 unsigned Opc = Inst.getOpcode(); 6972 6973 unsigned I = 1; 6974 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6975 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6976 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6977 } 6978 6979 for (unsigned E = Operands.size(); I != E; ++I) { 6980 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6981 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6982 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6983 } else if (Op.isInterpSlot() || 6984 Op.isInterpAttr() || 6985 Op.isAttrChan()) { 6986 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6987 } else if (Op.isImmModifier()) { 6988 OptionalIdx[Op.getImmTy()] = I; 6989 } else { 6990 llvm_unreachable("unhandled operand type"); 6991 } 6992 } 6993 6994 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6995 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6996 } 6997 6998 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6999 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7000 } 7001 7002 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7003 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7004 } 7005 } 7006 7007 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7008 OptionalImmIndexMap &OptionalIdx) { 7009 unsigned Opc = Inst.getOpcode(); 7010 7011 unsigned I = 1; 7012 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7013 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7014 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7015 } 7016 7017 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7018 // This instruction has src modifiers 7019 for (unsigned E = Operands.size(); I != E; ++I) { 7020 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7021 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7022 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7023 } else if (Op.isImmModifier()) { 7024 OptionalIdx[Op.getImmTy()] = I; 7025 } else if (Op.isRegOrImm()) { 7026 Op.addRegOrImmOperands(Inst, 1); 7027 } else { 7028 llvm_unreachable("unhandled operand type"); 7029 } 7030 } 7031 } else { 7032 // No src modifiers 7033 for (unsigned E = Operands.size(); I != E; ++I) { 7034 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7035 if (Op.isMod()) { 7036 OptionalIdx[Op.getImmTy()] = I; 7037 } else { 7038 Op.addRegOrImmOperands(Inst, 1); 7039 } 7040 } 7041 } 7042 7043 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7045 } 7046 7047 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7049 } 7050 7051 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7052 // it has src2 register operand that is tied to dst operand 7053 // we don't allow modifiers for this operand in assembler so src2_modifiers 7054 // should be 0. 7055 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7056 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7057 Opc == AMDGPU::V_MAC_F32_e64_vi || 7058 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7059 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7060 Opc == AMDGPU::V_MAC_F16_e64_vi || 7061 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7062 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7063 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7064 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7065 auto it = Inst.begin(); 7066 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7067 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7068 ++it; 7069 // Copy the operand to ensure it's not invalidated when Inst grows. 7070 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7071 } 7072 } 7073 7074 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7075 OptionalImmIndexMap OptionalIdx; 7076 cvtVOP3(Inst, Operands, OptionalIdx); 7077 } 7078 7079 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7080 const OperandVector &Operands) { 7081 OptionalImmIndexMap OptIdx; 7082 const int Opc = Inst.getOpcode(); 7083 const MCInstrDesc &Desc = MII.get(Opc); 7084 7085 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7086 7087 cvtVOP3(Inst, Operands, OptIdx); 7088 7089 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7090 assert(!IsPacked); 7091 Inst.addOperand(Inst.getOperand(0)); 7092 } 7093 7094 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7095 // instruction, and then figure out where to actually put the modifiers 7096 7097 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7098 7099 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7100 if (OpSelHiIdx != -1) { 7101 int DefaultVal = IsPacked ? -1 : 0; 7102 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7103 DefaultVal); 7104 } 7105 7106 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7107 if (NegLoIdx != -1) { 7108 assert(IsPacked); 7109 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7110 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7111 } 7112 7113 const int Ops[] = { AMDGPU::OpName::src0, 7114 AMDGPU::OpName::src1, 7115 AMDGPU::OpName::src2 }; 7116 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7117 AMDGPU::OpName::src1_modifiers, 7118 AMDGPU::OpName::src2_modifiers }; 7119 7120 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7121 7122 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7123 unsigned OpSelHi = 0; 7124 unsigned NegLo = 0; 7125 unsigned NegHi = 0; 7126 7127 if (OpSelHiIdx != -1) { 7128 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7129 } 7130 7131 if (NegLoIdx != -1) { 7132 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7133 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7134 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7135 } 7136 7137 for (int J = 0; J < 3; ++J) { 7138 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7139 if (OpIdx == -1) 7140 break; 7141 7142 uint32_t ModVal = 0; 7143 7144 if ((OpSel & (1 << J)) != 0) 7145 ModVal |= SISrcMods::OP_SEL_0; 7146 7147 if ((OpSelHi & (1 << J)) != 0) 7148 ModVal |= SISrcMods::OP_SEL_1; 7149 7150 if ((NegLo & (1 << J)) != 0) 7151 ModVal |= SISrcMods::NEG; 7152 7153 if ((NegHi & (1 << J)) != 0) 7154 ModVal |= SISrcMods::NEG_HI; 7155 7156 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7157 7158 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7159 } 7160 } 7161 7162 //===----------------------------------------------------------------------===// 7163 // dpp 7164 //===----------------------------------------------------------------------===// 7165 7166 bool AMDGPUOperand::isDPP8() const { 7167 return isImmTy(ImmTyDPP8); 7168 } 7169 7170 bool AMDGPUOperand::isDPPCtrl() const { 7171 using namespace AMDGPU::DPP; 7172 7173 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7174 if (result) { 7175 int64_t Imm = getImm(); 7176 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7177 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7178 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7179 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7180 (Imm == DppCtrl::WAVE_SHL1) || 7181 (Imm == DppCtrl::WAVE_ROL1) || 7182 (Imm == DppCtrl::WAVE_SHR1) || 7183 (Imm == DppCtrl::WAVE_ROR1) || 7184 (Imm == DppCtrl::ROW_MIRROR) || 7185 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7186 (Imm == DppCtrl::BCAST15) || 7187 (Imm == DppCtrl::BCAST31) || 7188 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7189 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7190 } 7191 return false; 7192 } 7193 7194 //===----------------------------------------------------------------------===// 7195 // mAI 7196 //===----------------------------------------------------------------------===// 7197 7198 bool AMDGPUOperand::isBLGP() const { 7199 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7200 } 7201 7202 bool AMDGPUOperand::isCBSZ() const { 7203 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7204 } 7205 7206 bool AMDGPUOperand::isABID() const { 7207 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7208 } 7209 7210 bool AMDGPUOperand::isS16Imm() const { 7211 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7212 } 7213 7214 bool AMDGPUOperand::isU16Imm() const { 7215 return isImm() && isUInt<16>(getImm()); 7216 } 7217 7218 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7219 if (!isGFX10Plus()) 7220 return MatchOperand_NoMatch; 7221 7222 SMLoc S = getLoc(); 7223 7224 if (!trySkipId("dim", AsmToken::Colon)) 7225 return MatchOperand_NoMatch; 7226 7227 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7228 // integer. 7229 std::string Token; 7230 if (isToken(AsmToken::Integer)) { 7231 SMLoc Loc = getToken().getEndLoc(); 7232 Token = std::string(getTokenStr()); 7233 lex(); 7234 if (getLoc() != Loc) 7235 return MatchOperand_ParseFail; 7236 } 7237 if (!isToken(AsmToken::Identifier)) 7238 return MatchOperand_ParseFail; 7239 Token += getTokenStr(); 7240 7241 StringRef DimId = Token; 7242 if (DimId.startswith("SQ_RSRC_IMG_")) 7243 DimId = DimId.substr(12); 7244 7245 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7246 if (!DimInfo) 7247 return MatchOperand_ParseFail; 7248 7249 lex(); 7250 7251 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7252 AMDGPUOperand::ImmTyDim)); 7253 return MatchOperand_Success; 7254 } 7255 7256 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7257 SMLoc S = getLoc(); 7258 7259 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7260 return MatchOperand_NoMatch; 7261 7262 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7263 7264 int64_t Sels[8]; 7265 7266 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7267 return MatchOperand_ParseFail; 7268 7269 for (size_t i = 0; i < 8; ++i) { 7270 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7271 return MatchOperand_ParseFail; 7272 7273 SMLoc Loc = getLoc(); 7274 if (getParser().parseAbsoluteExpression(Sels[i])) 7275 return MatchOperand_ParseFail; 7276 if (0 > Sels[i] || 7 < Sels[i]) { 7277 Error(Loc, "expected a 3-bit value"); 7278 return MatchOperand_ParseFail; 7279 } 7280 } 7281 7282 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7283 return MatchOperand_ParseFail; 7284 7285 unsigned DPP8 = 0; 7286 for (size_t i = 0; i < 8; ++i) 7287 DPP8 |= (Sels[i] << (i * 3)); 7288 7289 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7290 return MatchOperand_Success; 7291 } 7292 7293 bool 7294 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7295 const OperandVector &Operands) { 7296 if (Ctrl == "row_share" || 7297 Ctrl == "row_xmask") 7298 return isGFX10Plus(); 7299 7300 if (Ctrl == "wave_shl" || 7301 Ctrl == "wave_shr" || 7302 Ctrl == "wave_rol" || 7303 Ctrl == "wave_ror" || 7304 Ctrl == "row_bcast") 7305 return isVI() || isGFX9(); 7306 7307 return Ctrl == "row_mirror" || 7308 Ctrl == "row_half_mirror" || 7309 Ctrl == "quad_perm" || 7310 Ctrl == "row_shl" || 7311 Ctrl == "row_shr" || 7312 Ctrl == "row_ror"; 7313 } 7314 7315 int64_t 7316 AMDGPUAsmParser::parseDPPCtrlPerm() { 7317 // quad_perm:[%d,%d,%d,%d] 7318 7319 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7320 return -1; 7321 7322 int64_t Val = 0; 7323 for (int i = 0; i < 4; ++i) { 7324 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7325 return -1; 7326 7327 int64_t Temp; 7328 SMLoc Loc = getLoc(); 7329 if (getParser().parseAbsoluteExpression(Temp)) 7330 return -1; 7331 if (Temp < 0 || Temp > 3) { 7332 Error(Loc, "expected a 2-bit value"); 7333 return -1; 7334 } 7335 7336 Val += (Temp << i * 2); 7337 } 7338 7339 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7340 return -1; 7341 7342 return Val; 7343 } 7344 7345 int64_t 7346 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7347 using namespace AMDGPU::DPP; 7348 7349 // sel:%d 7350 7351 int64_t Val; 7352 SMLoc Loc = getLoc(); 7353 7354 if (getParser().parseAbsoluteExpression(Val)) 7355 return -1; 7356 7357 struct DppCtrlCheck { 7358 int64_t Ctrl; 7359 int Lo; 7360 int Hi; 7361 }; 7362 7363 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7364 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7365 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7366 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7367 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7368 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7369 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7370 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7371 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7372 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7373 .Default({-1, 0, 0}); 7374 7375 bool Valid; 7376 if (Check.Ctrl == -1) { 7377 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7378 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7379 } else { 7380 Valid = Check.Lo <= Val && Val <= Check.Hi; 7381 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7382 } 7383 7384 if (!Valid) { 7385 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7386 return -1; 7387 } 7388 7389 return Val; 7390 } 7391 7392 OperandMatchResultTy 7393 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7394 using namespace AMDGPU::DPP; 7395 7396 if (!isToken(AsmToken::Identifier) || 7397 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7398 return MatchOperand_NoMatch; 7399 7400 SMLoc S = getLoc(); 7401 int64_t Val = -1; 7402 StringRef Ctrl; 7403 7404 parseId(Ctrl); 7405 7406 if (Ctrl == "row_mirror") { 7407 Val = DppCtrl::ROW_MIRROR; 7408 } else if (Ctrl == "row_half_mirror") { 7409 Val = DppCtrl::ROW_HALF_MIRROR; 7410 } else { 7411 if (skipToken(AsmToken::Colon, "expected a colon")) { 7412 if (Ctrl == "quad_perm") { 7413 Val = parseDPPCtrlPerm(); 7414 } else { 7415 Val = parseDPPCtrlSel(Ctrl); 7416 } 7417 } 7418 } 7419 7420 if (Val == -1) 7421 return MatchOperand_ParseFail; 7422 7423 Operands.push_back( 7424 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7425 return MatchOperand_Success; 7426 } 7427 7428 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7429 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7430 } 7431 7432 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7433 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7434 } 7435 7436 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7437 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7438 } 7439 7440 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7441 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7442 } 7443 7444 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7445 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7446 } 7447 7448 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7449 OptionalImmIndexMap OptionalIdx; 7450 7451 unsigned I = 1; 7452 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7453 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7454 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7455 } 7456 7457 int Fi = 0; 7458 for (unsigned E = Operands.size(); I != E; ++I) { 7459 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7460 MCOI::TIED_TO); 7461 if (TiedTo != -1) { 7462 assert((unsigned)TiedTo < Inst.getNumOperands()); 7463 // handle tied old or src2 for MAC instructions 7464 Inst.addOperand(Inst.getOperand(TiedTo)); 7465 } 7466 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7467 // Add the register arguments 7468 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7469 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7470 // Skip it. 7471 continue; 7472 } 7473 7474 if (IsDPP8) { 7475 if (Op.isDPP8()) { 7476 Op.addImmOperands(Inst, 1); 7477 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7478 Op.addRegWithFPInputModsOperands(Inst, 2); 7479 } else if (Op.isFI()) { 7480 Fi = Op.getImm(); 7481 } else if (Op.isReg()) { 7482 Op.addRegOperands(Inst, 1); 7483 } else { 7484 llvm_unreachable("Invalid operand type"); 7485 } 7486 } else { 7487 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7488 Op.addRegWithFPInputModsOperands(Inst, 2); 7489 } else if (Op.isDPPCtrl()) { 7490 Op.addImmOperands(Inst, 1); 7491 } else if (Op.isImm()) { 7492 // Handle optional arguments 7493 OptionalIdx[Op.getImmTy()] = I; 7494 } else { 7495 llvm_unreachable("Invalid operand type"); 7496 } 7497 } 7498 } 7499 7500 if (IsDPP8) { 7501 using namespace llvm::AMDGPU::DPP; 7502 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7503 } else { 7504 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7507 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7509 } 7510 } 7511 } 7512 7513 //===----------------------------------------------------------------------===// 7514 // sdwa 7515 //===----------------------------------------------------------------------===// 7516 7517 OperandMatchResultTy 7518 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7519 AMDGPUOperand::ImmTy Type) { 7520 using namespace llvm::AMDGPU::SDWA; 7521 7522 SMLoc S = getLoc(); 7523 StringRef Value; 7524 OperandMatchResultTy res; 7525 7526 res = parseStringWithPrefix(Prefix, Value); 7527 if (res != MatchOperand_Success) { 7528 return res; 7529 } 7530 7531 int64_t Int; 7532 Int = StringSwitch<int64_t>(Value) 7533 .Case("BYTE_0", SdwaSel::BYTE_0) 7534 .Case("BYTE_1", SdwaSel::BYTE_1) 7535 .Case("BYTE_2", SdwaSel::BYTE_2) 7536 .Case("BYTE_3", SdwaSel::BYTE_3) 7537 .Case("WORD_0", SdwaSel::WORD_0) 7538 .Case("WORD_1", SdwaSel::WORD_1) 7539 .Case("DWORD", SdwaSel::DWORD) 7540 .Default(0xffffffff); 7541 7542 if (Int == 0xffffffff) { 7543 return MatchOperand_ParseFail; 7544 } 7545 7546 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7547 return MatchOperand_Success; 7548 } 7549 7550 OperandMatchResultTy 7551 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7552 using namespace llvm::AMDGPU::SDWA; 7553 7554 SMLoc S = getLoc(); 7555 StringRef Value; 7556 OperandMatchResultTy res; 7557 7558 res = parseStringWithPrefix("dst_unused", Value); 7559 if (res != MatchOperand_Success) { 7560 return res; 7561 } 7562 7563 int64_t Int; 7564 Int = StringSwitch<int64_t>(Value) 7565 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7566 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7567 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7568 .Default(0xffffffff); 7569 7570 if (Int == 0xffffffff) { 7571 return MatchOperand_ParseFail; 7572 } 7573 7574 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7575 return MatchOperand_Success; 7576 } 7577 7578 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7579 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7580 } 7581 7582 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7583 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7584 } 7585 7586 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7587 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7588 } 7589 7590 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7591 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7592 } 7593 7594 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7595 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7596 } 7597 7598 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7599 uint64_t BasicInstType, 7600 bool SkipDstVcc, 7601 bool SkipSrcVcc) { 7602 using namespace llvm::AMDGPU::SDWA; 7603 7604 OptionalImmIndexMap OptionalIdx; 7605 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7606 bool SkippedVcc = false; 7607 7608 unsigned I = 1; 7609 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7610 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7611 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7612 } 7613 7614 for (unsigned E = Operands.size(); I != E; ++I) { 7615 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7616 if (SkipVcc && !SkippedVcc && Op.isReg() && 7617 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7618 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7619 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7620 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7621 // Skip VCC only if we didn't skip it on previous iteration. 7622 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7623 if (BasicInstType == SIInstrFlags::VOP2 && 7624 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7625 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7626 SkippedVcc = true; 7627 continue; 7628 } else if (BasicInstType == SIInstrFlags::VOPC && 7629 Inst.getNumOperands() == 0) { 7630 SkippedVcc = true; 7631 continue; 7632 } 7633 } 7634 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7635 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7636 } else if (Op.isImm()) { 7637 // Handle optional arguments 7638 OptionalIdx[Op.getImmTy()] = I; 7639 } else { 7640 llvm_unreachable("Invalid operand type"); 7641 } 7642 SkippedVcc = false; 7643 } 7644 7645 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7646 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7647 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7648 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7649 switch (BasicInstType) { 7650 case SIInstrFlags::VOP1: 7651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7652 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7653 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7654 } 7655 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7656 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7658 break; 7659 7660 case SIInstrFlags::VOP2: 7661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7662 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7664 } 7665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7669 break; 7670 7671 case SIInstrFlags::VOPC: 7672 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7674 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7675 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7676 break; 7677 7678 default: 7679 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7680 } 7681 } 7682 7683 // special case v_mac_{f16, f32}: 7684 // it has src2 register operand that is tied to dst operand 7685 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7686 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7687 auto it = Inst.begin(); 7688 std::advance( 7689 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7690 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7691 } 7692 } 7693 7694 //===----------------------------------------------------------------------===// 7695 // mAI 7696 //===----------------------------------------------------------------------===// 7697 7698 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7699 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7700 } 7701 7702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7703 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7704 } 7705 7706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7707 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7708 } 7709 7710 /// Force static initialization. 7711 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7712 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7713 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7714 } 7715 7716 #define GET_REGISTER_MATCHER 7717 #define GET_MATCHER_IMPLEMENTATION 7718 #define GET_MNEMONIC_SPELL_CHECKER 7719 #define GET_MNEMONIC_CHECKER 7720 #include "AMDGPUGenAsmMatcher.inc" 7721 7722 // This fuction should be defined after auto-generated include so that we have 7723 // MatchClassKind enum defined 7724 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7725 unsigned Kind) { 7726 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7727 // But MatchInstructionImpl() expects to meet token and fails to validate 7728 // operand. This method checks if we are given immediate operand but expect to 7729 // get corresponding token. 7730 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7731 switch (Kind) { 7732 case MCK_addr64: 7733 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7734 case MCK_gds: 7735 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7736 case MCK_lds: 7737 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7738 case MCK_glc: 7739 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7740 case MCK_idxen: 7741 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7742 case MCK_offen: 7743 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7744 case MCK_SSrcB32: 7745 // When operands have expression values, they will return true for isToken, 7746 // because it is not possible to distinguish between a token and an 7747 // expression at parse time. MatchInstructionImpl() will always try to 7748 // match an operand as a token, when isToken returns true, and when the 7749 // name of the expression is not a valid token, the match will fail, 7750 // so we need to handle it here. 7751 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7752 case MCK_SSrcF32: 7753 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7754 case MCK_SoppBrTarget: 7755 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7756 case MCK_VReg32OrOff: 7757 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7758 case MCK_InterpSlot: 7759 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7760 case MCK_Attr: 7761 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7762 case MCK_AttrChan: 7763 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7764 case MCK_ImmSMEMOffset: 7765 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7766 case MCK_SReg_64: 7767 case MCK_SReg_64_XEXEC: 7768 // Null is defined as a 32-bit register but 7769 // it should also be enabled with 64-bit operands. 7770 // The following code enables it for SReg_64 operands 7771 // used as source and destination. Remaining source 7772 // operands are handled in isInlinableImm. 7773 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7774 default: 7775 return Match_InvalidOperand; 7776 } 7777 } 7778 7779 //===----------------------------------------------------------------------===// 7780 // endpgm 7781 //===----------------------------------------------------------------------===// 7782 7783 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7784 SMLoc S = getLoc(); 7785 int64_t Imm = 0; 7786 7787 if (!parseExpr(Imm)) { 7788 // The operand is optional, if not present default to 0 7789 Imm = 0; 7790 } 7791 7792 if (!isUInt<16>(Imm)) { 7793 Error(S, "expected a 16-bit value"); 7794 return MatchOperand_ParseFail; 7795 } 7796 7797 Operands.push_back( 7798 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7799 return MatchOperand_Success; 7800 } 7801 7802 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7803