1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1333 1334 bool isSI() const { 1335 return AMDGPU::isSI(getSTI()); 1336 } 1337 1338 bool isCI() const { 1339 return AMDGPU::isCI(getSTI()); 1340 } 1341 1342 bool isVI() const { 1343 return AMDGPU::isVI(getSTI()); 1344 } 1345 1346 bool isGFX9() const { 1347 return AMDGPU::isGFX9(getSTI()); 1348 } 1349 1350 bool isGFX90A() const { 1351 return AMDGPU::isGFX90A(getSTI()); 1352 } 1353 1354 bool isGFX9Plus() const { 1355 return AMDGPU::isGFX9Plus(getSTI()); 1356 } 1357 1358 bool isGFX10() const { 1359 return AMDGPU::isGFX10(getSTI()); 1360 } 1361 1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1363 1364 bool isGFX10_BEncoding() const { 1365 return AMDGPU::isGFX10_BEncoding(getSTI()); 1366 } 1367 1368 bool hasInv2PiInlineImm() const { 1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1370 } 1371 1372 bool hasFlatOffsets() const { 1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1374 } 1375 1376 bool hasArchitectedFlatScratch() const { 1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1378 } 1379 1380 bool hasSGPR102_SGPR103() const { 1381 return !isVI() && !isGFX9(); 1382 } 1383 1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1385 1386 bool hasIntClamp() const { 1387 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1388 } 1389 1390 AMDGPUTargetStreamer &getTargetStreamer() { 1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1392 return static_cast<AMDGPUTargetStreamer &>(TS); 1393 } 1394 1395 const MCRegisterInfo *getMRI() const { 1396 // We need this const_cast because for some reason getContext() is not const 1397 // in MCAsmParser. 1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1399 } 1400 1401 const MCInstrInfo *getMII() const { 1402 return &MII; 1403 } 1404 1405 const FeatureBitset &getFeatureBits() const { 1406 return getSTI().getFeatureBits(); 1407 } 1408 1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1412 1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1415 bool isForcedDPP() const { return ForcedDPP; } 1416 bool isForcedSDWA() const { return ForcedSDWA; } 1417 ArrayRef<unsigned> getMatchedVariants() const; 1418 StringRef getMatchedVariantName() const; 1419 1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1422 bool RestoreOnFailure); 1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1425 SMLoc &EndLoc) override; 1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1428 unsigned Kind) override; 1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1430 OperandVector &Operands, MCStreamer &Out, 1431 uint64_t &ErrorInfo, 1432 bool MatchingInlineAsm) override; 1433 bool ParseDirective(AsmToken DirectiveID) override; 1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1435 OperandMode Mode = OperandMode_Default); 1436 StringRef parseMnemonicSuffix(StringRef Name); 1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1438 SMLoc NameLoc, OperandVector &Operands) override; 1439 //bool ProcessInstruction(MCInst &Inst); 1440 1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1442 1443 OperandMatchResultTy 1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t &) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseOperandArrayWithPrefix(const char *Prefix, 1450 OperandVector &Operands, 1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1452 bool (*ConvertResult)(int64_t&) = nullptr); 1453 1454 OperandMatchResultTy 1455 parseNamedBit(StringRef Name, OperandVector &Operands, 1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1457 OperandMatchResultTy parseCPol(OperandVector &Operands); 1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1459 StringRef &Value, 1460 SMLoc &StringLoc); 1461 1462 bool isModifier(); 1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1467 bool parseSP3NegModifier(); 1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1469 OperandMatchResultTy parseReg(OperandVector &Operands); 1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1477 OperandMatchResultTy parseUfmt(int64_t &Format); 1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1482 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1485 1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1489 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1490 1491 bool parseCnt(int64_t &IntVal); 1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1493 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1494 1495 private: 1496 struct OperandInfoTy { 1497 SMLoc Loc; 1498 int64_t Id; 1499 bool IsSymbolic = false; 1500 bool IsDefined = false; 1501 1502 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1503 }; 1504 1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1506 bool validateSendMsg(const OperandInfoTy &Msg, 1507 const OperandInfoTy &Op, 1508 const OperandInfoTy &Stream); 1509 1510 bool parseHwregBody(OperandInfoTy &HwReg, 1511 OperandInfoTy &Offset, 1512 OperandInfoTy &Width); 1513 bool validateHwreg(const OperandInfoTy &HwReg, 1514 const OperandInfoTy &Offset, 1515 const OperandInfoTy &Width); 1516 1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1519 1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1521 const OperandVector &Operands) const; 1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1524 SMLoc getLitLoc(const OperandVector &Operands) const; 1525 SMLoc getConstLoc(const OperandVector &Operands) const; 1526 1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1530 bool validateSOPLiteral(const MCInst &Inst) const; 1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateIntClampSupported(const MCInst &Inst); 1534 bool validateMIMGAtomicDMask(const MCInst &Inst); 1535 bool validateMIMGGatherDMask(const MCInst &Inst); 1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateMIMGDataSize(const MCInst &Inst); 1538 bool validateMIMGAddrSize(const MCInst &Inst); 1539 bool validateMIMGD16(const MCInst &Inst); 1540 bool validateMIMGDim(const MCInst &Inst); 1541 bool validateMIMGMSAA(const MCInst &Inst); 1542 bool validateOpSel(const MCInst &Inst); 1543 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1544 bool validateVccOperand(unsigned Reg) const; 1545 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1547 bool validateAGPRLdSt(const MCInst &Inst) const; 1548 bool validateVGPRAlign(const MCInst &Inst) const; 1549 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1550 bool validateDivScale(const MCInst &Inst); 1551 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1552 const SMLoc &IDLoc); 1553 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1554 unsigned getConstantBusLimit(unsigned Opcode) const; 1555 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1556 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1557 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1558 1559 bool isSupportedMnemo(StringRef Mnemo, 1560 const FeatureBitset &FBS); 1561 bool isSupportedMnemo(StringRef Mnemo, 1562 const FeatureBitset &FBS, 1563 ArrayRef<unsigned> Variants); 1564 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1565 1566 bool isId(const StringRef Id) const; 1567 bool isId(const AsmToken &Token, const StringRef Id) const; 1568 bool isToken(const AsmToken::TokenKind Kind) const; 1569 bool trySkipId(const StringRef Id); 1570 bool trySkipId(const StringRef Pref, const StringRef Id); 1571 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1572 bool trySkipToken(const AsmToken::TokenKind Kind); 1573 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1574 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1575 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1576 1577 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1578 AsmToken::TokenKind getTokenKind() const; 1579 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1580 bool parseExpr(OperandVector &Operands); 1581 StringRef getTokenStr() const; 1582 AsmToken peekToken(); 1583 AsmToken getToken() const; 1584 SMLoc getLoc() const; 1585 void lex(); 1586 1587 public: 1588 void onBeginOfFile() override; 1589 1590 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1591 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1592 1593 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1594 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1595 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1596 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1597 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1598 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1599 1600 bool parseSwizzleOperand(int64_t &Op, 1601 const unsigned MinVal, 1602 const unsigned MaxVal, 1603 const StringRef ErrMsg, 1604 SMLoc &Loc); 1605 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1606 const unsigned MinVal, 1607 const unsigned MaxVal, 1608 const StringRef ErrMsg); 1609 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1610 bool parseSwizzleOffset(int64_t &Imm); 1611 bool parseSwizzleMacro(int64_t &Imm); 1612 bool parseSwizzleQuadPerm(int64_t &Imm); 1613 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1614 bool parseSwizzleBroadcast(int64_t &Imm); 1615 bool parseSwizzleSwap(int64_t &Imm); 1616 bool parseSwizzleReverse(int64_t &Imm); 1617 1618 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1619 int64_t parseGPRIdxMacro(); 1620 1621 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1622 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1623 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1624 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1625 1626 AMDGPUOperand::Ptr defaultCPol() const; 1627 1628 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1629 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1630 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1631 AMDGPUOperand::Ptr defaultFlatOffset() const; 1632 1633 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1634 1635 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1636 OptionalImmIndexMap &OptionalIdx); 1637 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1638 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1639 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1640 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1641 OptionalImmIndexMap &OptionalIdx); 1642 1643 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1644 1645 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1646 bool IsAtomic = false); 1647 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1648 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1649 1650 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1651 1652 bool parseDimId(unsigned &Encoding); 1653 OperandMatchResultTy parseDim(OperandVector &Operands); 1654 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1655 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1656 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1657 int64_t parseDPPCtrlSel(StringRef Ctrl); 1658 int64_t parseDPPCtrlPerm(); 1659 AMDGPUOperand::Ptr defaultRowMask() const; 1660 AMDGPUOperand::Ptr defaultBankMask() const; 1661 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1662 AMDGPUOperand::Ptr defaultFI() const; 1663 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1664 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1665 1666 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1667 AMDGPUOperand::ImmTy Type); 1668 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1669 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1670 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1671 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1672 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1673 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1674 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1675 uint64_t BasicInstType, 1676 bool SkipDstVcc = false, 1677 bool SkipSrcVcc = false); 1678 1679 AMDGPUOperand::Ptr defaultBLGP() const; 1680 AMDGPUOperand::Ptr defaultCBSZ() const; 1681 AMDGPUOperand::Ptr defaultABID() const; 1682 1683 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1684 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1685 }; 1686 1687 struct OptionalOperand { 1688 const char *Name; 1689 AMDGPUOperand::ImmTy Type; 1690 bool IsBit; 1691 bool (*ConvertResult)(int64_t&); 1692 }; 1693 1694 } // end anonymous namespace 1695 1696 // May be called with integer type with equivalent bitwidth. 1697 static const fltSemantics *getFltSemantics(unsigned Size) { 1698 switch (Size) { 1699 case 4: 1700 return &APFloat::IEEEsingle(); 1701 case 8: 1702 return &APFloat::IEEEdouble(); 1703 case 2: 1704 return &APFloat::IEEEhalf(); 1705 default: 1706 llvm_unreachable("unsupported fp type"); 1707 } 1708 } 1709 1710 static const fltSemantics *getFltSemantics(MVT VT) { 1711 return getFltSemantics(VT.getSizeInBits() / 8); 1712 } 1713 1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1715 switch (OperandType) { 1716 case AMDGPU::OPERAND_REG_IMM_INT32: 1717 case AMDGPU::OPERAND_REG_IMM_FP32: 1718 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1719 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1720 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1721 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1722 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1723 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1724 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1725 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1726 return &APFloat::IEEEsingle(); 1727 case AMDGPU::OPERAND_REG_IMM_INT64: 1728 case AMDGPU::OPERAND_REG_IMM_FP64: 1729 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1730 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1732 return &APFloat::IEEEdouble(); 1733 case AMDGPU::OPERAND_REG_IMM_INT16: 1734 case AMDGPU::OPERAND_REG_IMM_FP16: 1735 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1736 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1737 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1738 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1739 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1740 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1741 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1742 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1743 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1744 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1745 return &APFloat::IEEEhalf(); 1746 default: 1747 llvm_unreachable("unsupported fp type"); 1748 } 1749 } 1750 1751 //===----------------------------------------------------------------------===// 1752 // Operand 1753 //===----------------------------------------------------------------------===// 1754 1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1756 bool Lost; 1757 1758 // Convert literal to single precision 1759 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1760 APFloat::rmNearestTiesToEven, 1761 &Lost); 1762 // We allow precision lost but not overflow or underflow 1763 if (Status != APFloat::opOK && 1764 Lost && 1765 ((Status & APFloat::opOverflow) != 0 || 1766 (Status & APFloat::opUnderflow) != 0)) { 1767 return false; 1768 } 1769 1770 return true; 1771 } 1772 1773 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1774 return isUIntN(Size, Val) || isIntN(Size, Val); 1775 } 1776 1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1778 if (VT.getScalarType() == MVT::i16) { 1779 // FP immediate values are broken. 1780 return isInlinableIntLiteral(Val); 1781 } 1782 1783 // f16/v2f16 operands work correctly for all values. 1784 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1785 } 1786 1787 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1788 1789 // This is a hack to enable named inline values like 1790 // shared_base with both 32-bit and 64-bit operands. 1791 // Note that these values are defined as 1792 // 32-bit operands only. 1793 if (isInlineValue()) { 1794 return true; 1795 } 1796 1797 if (!isImmTy(ImmTyNone)) { 1798 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1799 return false; 1800 } 1801 // TODO: We should avoid using host float here. It would be better to 1802 // check the float bit values which is what a few other places do. 1803 // We've had bot failures before due to weird NaN support on mips hosts. 1804 1805 APInt Literal(64, Imm.Val); 1806 1807 if (Imm.IsFPImm) { // We got fp literal token 1808 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1809 return AMDGPU::isInlinableLiteral64(Imm.Val, 1810 AsmParser->hasInv2PiInlineImm()); 1811 } 1812 1813 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1814 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1815 return false; 1816 1817 if (type.getScalarSizeInBits() == 16) { 1818 return isInlineableLiteralOp16( 1819 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1820 type, AsmParser->hasInv2PiInlineImm()); 1821 } 1822 1823 // Check if single precision literal is inlinable 1824 return AMDGPU::isInlinableLiteral32( 1825 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1826 AsmParser->hasInv2PiInlineImm()); 1827 } 1828 1829 // We got int literal token. 1830 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1831 return AMDGPU::isInlinableLiteral64(Imm.Val, 1832 AsmParser->hasInv2PiInlineImm()); 1833 } 1834 1835 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1836 return false; 1837 } 1838 1839 if (type.getScalarSizeInBits() == 16) { 1840 return isInlineableLiteralOp16( 1841 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1842 type, AsmParser->hasInv2PiInlineImm()); 1843 } 1844 1845 return AMDGPU::isInlinableLiteral32( 1846 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1847 AsmParser->hasInv2PiInlineImm()); 1848 } 1849 1850 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1851 // Check that this immediate can be added as literal 1852 if (!isImmTy(ImmTyNone)) { 1853 return false; 1854 } 1855 1856 if (!Imm.IsFPImm) { 1857 // We got int literal token. 1858 1859 if (type == MVT::f64 && hasFPModifiers()) { 1860 // Cannot apply fp modifiers to int literals preserving the same semantics 1861 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1862 // disable these cases. 1863 return false; 1864 } 1865 1866 unsigned Size = type.getSizeInBits(); 1867 if (Size == 64) 1868 Size = 32; 1869 1870 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1871 // types. 1872 return isSafeTruncation(Imm.Val, Size); 1873 } 1874 1875 // We got fp literal token 1876 if (type == MVT::f64) { // Expected 64-bit fp operand 1877 // We would set low 64-bits of literal to zeroes but we accept this literals 1878 return true; 1879 } 1880 1881 if (type == MVT::i64) { // Expected 64-bit int operand 1882 // We don't allow fp literals in 64-bit integer instructions. It is 1883 // unclear how we should encode them. 1884 return false; 1885 } 1886 1887 // We allow fp literals with f16x2 operands assuming that the specified 1888 // literal goes into the lower half and the upper half is zero. We also 1889 // require that the literal may be losslesly converted to f16. 1890 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1891 (type == MVT::v2i16)? MVT::i16 : 1892 (type == MVT::v2f32)? MVT::f32 : type; 1893 1894 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1895 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1896 } 1897 1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1899 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1900 } 1901 1902 bool AMDGPUOperand::isVRegWithInputMods() const { 1903 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1904 // GFX90A allows DPP on 64-bit operands. 1905 (isRegClass(AMDGPU::VReg_64RegClassID) && 1906 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1907 } 1908 1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1910 if (AsmParser->isVI()) 1911 return isVReg32(); 1912 else if (AsmParser->isGFX9Plus()) 1913 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1914 else 1915 return false; 1916 } 1917 1918 bool AMDGPUOperand::isSDWAFP16Operand() const { 1919 return isSDWAOperand(MVT::f16); 1920 } 1921 1922 bool AMDGPUOperand::isSDWAFP32Operand() const { 1923 return isSDWAOperand(MVT::f32); 1924 } 1925 1926 bool AMDGPUOperand::isSDWAInt16Operand() const { 1927 return isSDWAOperand(MVT::i16); 1928 } 1929 1930 bool AMDGPUOperand::isSDWAInt32Operand() const { 1931 return isSDWAOperand(MVT::i32); 1932 } 1933 1934 bool AMDGPUOperand::isBoolReg() const { 1935 auto FB = AsmParser->getFeatureBits(); 1936 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1937 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1938 } 1939 1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1941 { 1942 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1943 assert(Size == 2 || Size == 4 || Size == 8); 1944 1945 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1946 1947 if (Imm.Mods.Abs) { 1948 Val &= ~FpSignMask; 1949 } 1950 if (Imm.Mods.Neg) { 1951 Val ^= FpSignMask; 1952 } 1953 1954 return Val; 1955 } 1956 1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1958 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1959 Inst.getNumOperands())) { 1960 addLiteralImmOperand(Inst, Imm.Val, 1961 ApplyModifiers & 1962 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1963 } else { 1964 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1965 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1966 setImmKindNone(); 1967 } 1968 } 1969 1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1971 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1972 auto OpNum = Inst.getNumOperands(); 1973 // Check that this operand accepts literals 1974 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1975 1976 if (ApplyModifiers) { 1977 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1978 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1979 Val = applyInputFPModifiers(Val, Size); 1980 } 1981 1982 APInt Literal(64, Val); 1983 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1984 1985 if (Imm.IsFPImm) { // We got fp literal token 1986 switch (OpTy) { 1987 case AMDGPU::OPERAND_REG_IMM_INT64: 1988 case AMDGPU::OPERAND_REG_IMM_FP64: 1989 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1990 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1991 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1992 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1993 AsmParser->hasInv2PiInlineImm())) { 1994 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1995 setImmKindConst(); 1996 return; 1997 } 1998 1999 // Non-inlineable 2000 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2001 // For fp operands we check if low 32 bits are zeros 2002 if (Literal.getLoBits(32) != 0) { 2003 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2004 "Can't encode literal as exact 64-bit floating-point operand. " 2005 "Low 32-bits will be set to zero"); 2006 } 2007 2008 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2009 setImmKindLiteral(); 2010 return; 2011 } 2012 2013 // We don't allow fp literals in 64-bit integer instructions. It is 2014 // unclear how we should encode them. This case should be checked earlier 2015 // in predicate methods (isLiteralImm()) 2016 llvm_unreachable("fp literal in 64-bit integer instruction."); 2017 2018 case AMDGPU::OPERAND_REG_IMM_INT32: 2019 case AMDGPU::OPERAND_REG_IMM_FP32: 2020 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2021 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2022 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2023 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2024 case AMDGPU::OPERAND_REG_IMM_INT16: 2025 case AMDGPU::OPERAND_REG_IMM_FP16: 2026 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2027 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2028 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2029 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2030 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2031 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2032 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2033 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2034 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2035 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2036 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2037 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2038 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2039 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2040 bool lost; 2041 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2042 // Convert literal to single precision 2043 FPLiteral.convert(*getOpFltSemantics(OpTy), 2044 APFloat::rmNearestTiesToEven, &lost); 2045 // We allow precision lost but not overflow or underflow. This should be 2046 // checked earlier in isLiteralImm() 2047 2048 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2049 Inst.addOperand(MCOperand::createImm(ImmVal)); 2050 setImmKindLiteral(); 2051 return; 2052 } 2053 default: 2054 llvm_unreachable("invalid operand size"); 2055 } 2056 2057 return; 2058 } 2059 2060 // We got int literal token. 2061 // Only sign extend inline immediates. 2062 switch (OpTy) { 2063 case AMDGPU::OPERAND_REG_IMM_INT32: 2064 case AMDGPU::OPERAND_REG_IMM_FP32: 2065 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2066 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2067 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2068 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2069 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2070 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2071 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2072 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2073 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2074 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2075 if (isSafeTruncation(Val, 32) && 2076 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2077 AsmParser->hasInv2PiInlineImm())) { 2078 Inst.addOperand(MCOperand::createImm(Val)); 2079 setImmKindConst(); 2080 return; 2081 } 2082 2083 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2084 setImmKindLiteral(); 2085 return; 2086 2087 case AMDGPU::OPERAND_REG_IMM_INT64: 2088 case AMDGPU::OPERAND_REG_IMM_FP64: 2089 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2090 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2091 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2092 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2093 Inst.addOperand(MCOperand::createImm(Val)); 2094 setImmKindConst(); 2095 return; 2096 } 2097 2098 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2099 setImmKindLiteral(); 2100 return; 2101 2102 case AMDGPU::OPERAND_REG_IMM_INT16: 2103 case AMDGPU::OPERAND_REG_IMM_FP16: 2104 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2105 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2106 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2107 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2108 if (isSafeTruncation(Val, 16) && 2109 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2110 AsmParser->hasInv2PiInlineImm())) { 2111 Inst.addOperand(MCOperand::createImm(Val)); 2112 setImmKindConst(); 2113 return; 2114 } 2115 2116 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2117 setImmKindLiteral(); 2118 return; 2119 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2123 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2124 assert(isSafeTruncation(Val, 16)); 2125 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2126 AsmParser->hasInv2PiInlineImm())); 2127 2128 Inst.addOperand(MCOperand::createImm(Val)); 2129 return; 2130 } 2131 default: 2132 llvm_unreachable("invalid operand size"); 2133 } 2134 } 2135 2136 template <unsigned Bitwidth> 2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2138 APInt Literal(64, Imm.Val); 2139 setImmKindNone(); 2140 2141 if (!Imm.IsFPImm) { 2142 // We got int literal token. 2143 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2144 return; 2145 } 2146 2147 bool Lost; 2148 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2149 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2150 APFloat::rmNearestTiesToEven, &Lost); 2151 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2152 } 2153 2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2155 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2156 } 2157 2158 static bool isInlineValue(unsigned Reg) { 2159 switch (Reg) { 2160 case AMDGPU::SRC_SHARED_BASE: 2161 case AMDGPU::SRC_SHARED_LIMIT: 2162 case AMDGPU::SRC_PRIVATE_BASE: 2163 case AMDGPU::SRC_PRIVATE_LIMIT: 2164 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2165 return true; 2166 case AMDGPU::SRC_VCCZ: 2167 case AMDGPU::SRC_EXECZ: 2168 case AMDGPU::SRC_SCC: 2169 return true; 2170 case AMDGPU::SGPR_NULL: 2171 return true; 2172 default: 2173 return false; 2174 } 2175 } 2176 2177 bool AMDGPUOperand::isInlineValue() const { 2178 return isRegKind() && ::isInlineValue(getReg()); 2179 } 2180 2181 //===----------------------------------------------------------------------===// 2182 // AsmParser 2183 //===----------------------------------------------------------------------===// 2184 2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2186 if (Is == IS_VGPR) { 2187 switch (RegWidth) { 2188 default: return -1; 2189 case 1: return AMDGPU::VGPR_32RegClassID; 2190 case 2: return AMDGPU::VReg_64RegClassID; 2191 case 3: return AMDGPU::VReg_96RegClassID; 2192 case 4: return AMDGPU::VReg_128RegClassID; 2193 case 5: return AMDGPU::VReg_160RegClassID; 2194 case 6: return AMDGPU::VReg_192RegClassID; 2195 case 7: return AMDGPU::VReg_224RegClassID; 2196 case 8: return AMDGPU::VReg_256RegClassID; 2197 case 16: return AMDGPU::VReg_512RegClassID; 2198 case 32: return AMDGPU::VReg_1024RegClassID; 2199 } 2200 } else if (Is == IS_TTMP) { 2201 switch (RegWidth) { 2202 default: return -1; 2203 case 1: return AMDGPU::TTMP_32RegClassID; 2204 case 2: return AMDGPU::TTMP_64RegClassID; 2205 case 4: return AMDGPU::TTMP_128RegClassID; 2206 case 8: return AMDGPU::TTMP_256RegClassID; 2207 case 16: return AMDGPU::TTMP_512RegClassID; 2208 } 2209 } else if (Is == IS_SGPR) { 2210 switch (RegWidth) { 2211 default: return -1; 2212 case 1: return AMDGPU::SGPR_32RegClassID; 2213 case 2: return AMDGPU::SGPR_64RegClassID; 2214 case 3: return AMDGPU::SGPR_96RegClassID; 2215 case 4: return AMDGPU::SGPR_128RegClassID; 2216 case 5: return AMDGPU::SGPR_160RegClassID; 2217 case 6: return AMDGPU::SGPR_192RegClassID; 2218 case 7: return AMDGPU::SGPR_224RegClassID; 2219 case 8: return AMDGPU::SGPR_256RegClassID; 2220 case 16: return AMDGPU::SGPR_512RegClassID; 2221 } 2222 } else if (Is == IS_AGPR) { 2223 switch (RegWidth) { 2224 default: return -1; 2225 case 1: return AMDGPU::AGPR_32RegClassID; 2226 case 2: return AMDGPU::AReg_64RegClassID; 2227 case 3: return AMDGPU::AReg_96RegClassID; 2228 case 4: return AMDGPU::AReg_128RegClassID; 2229 case 5: return AMDGPU::AReg_160RegClassID; 2230 case 6: return AMDGPU::AReg_192RegClassID; 2231 case 7: return AMDGPU::AReg_224RegClassID; 2232 case 8: return AMDGPU::AReg_256RegClassID; 2233 case 16: return AMDGPU::AReg_512RegClassID; 2234 case 32: return AMDGPU::AReg_1024RegClassID; 2235 } 2236 } 2237 return -1; 2238 } 2239 2240 static unsigned getSpecialRegForName(StringRef RegName) { 2241 return StringSwitch<unsigned>(RegName) 2242 .Case("exec", AMDGPU::EXEC) 2243 .Case("vcc", AMDGPU::VCC) 2244 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2245 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2246 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2247 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2248 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2249 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2250 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2251 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2252 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2253 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2254 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2255 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2256 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2257 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2258 .Case("m0", AMDGPU::M0) 2259 .Case("vccz", AMDGPU::SRC_VCCZ) 2260 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2261 .Case("execz", AMDGPU::SRC_EXECZ) 2262 .Case("src_execz", AMDGPU::SRC_EXECZ) 2263 .Case("scc", AMDGPU::SRC_SCC) 2264 .Case("src_scc", AMDGPU::SRC_SCC) 2265 .Case("tba", AMDGPU::TBA) 2266 .Case("tma", AMDGPU::TMA) 2267 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2268 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2269 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2270 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2271 .Case("vcc_lo", AMDGPU::VCC_LO) 2272 .Case("vcc_hi", AMDGPU::VCC_HI) 2273 .Case("exec_lo", AMDGPU::EXEC_LO) 2274 .Case("exec_hi", AMDGPU::EXEC_HI) 2275 .Case("tma_lo", AMDGPU::TMA_LO) 2276 .Case("tma_hi", AMDGPU::TMA_HI) 2277 .Case("tba_lo", AMDGPU::TBA_LO) 2278 .Case("tba_hi", AMDGPU::TBA_HI) 2279 .Case("pc", AMDGPU::PC_REG) 2280 .Case("null", AMDGPU::SGPR_NULL) 2281 .Default(AMDGPU::NoRegister); 2282 } 2283 2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2285 SMLoc &EndLoc, bool RestoreOnFailure) { 2286 auto R = parseRegister(); 2287 if (!R) return true; 2288 assert(R->isReg()); 2289 RegNo = R->getReg(); 2290 StartLoc = R->getStartLoc(); 2291 EndLoc = R->getEndLoc(); 2292 return false; 2293 } 2294 2295 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2296 SMLoc &EndLoc) { 2297 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2298 } 2299 2300 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2301 SMLoc &StartLoc, 2302 SMLoc &EndLoc) { 2303 bool Result = 2304 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2305 bool PendingErrors = getParser().hasPendingError(); 2306 getParser().clearPendingErrors(); 2307 if (PendingErrors) 2308 return MatchOperand_ParseFail; 2309 if (Result) 2310 return MatchOperand_NoMatch; 2311 return MatchOperand_Success; 2312 } 2313 2314 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2315 RegisterKind RegKind, unsigned Reg1, 2316 SMLoc Loc) { 2317 switch (RegKind) { 2318 case IS_SPECIAL: 2319 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2320 Reg = AMDGPU::EXEC; 2321 RegWidth = 2; 2322 return true; 2323 } 2324 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2325 Reg = AMDGPU::FLAT_SCR; 2326 RegWidth = 2; 2327 return true; 2328 } 2329 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2330 Reg = AMDGPU::XNACK_MASK; 2331 RegWidth = 2; 2332 return true; 2333 } 2334 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2335 Reg = AMDGPU::VCC; 2336 RegWidth = 2; 2337 return true; 2338 } 2339 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2340 Reg = AMDGPU::TBA; 2341 RegWidth = 2; 2342 return true; 2343 } 2344 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2345 Reg = AMDGPU::TMA; 2346 RegWidth = 2; 2347 return true; 2348 } 2349 Error(Loc, "register does not fit in the list"); 2350 return false; 2351 case IS_VGPR: 2352 case IS_SGPR: 2353 case IS_AGPR: 2354 case IS_TTMP: 2355 if (Reg1 != Reg + RegWidth) { 2356 Error(Loc, "registers in a list must have consecutive indices"); 2357 return false; 2358 } 2359 RegWidth++; 2360 return true; 2361 default: 2362 llvm_unreachable("unexpected register kind"); 2363 } 2364 } 2365 2366 struct RegInfo { 2367 StringLiteral Name; 2368 RegisterKind Kind; 2369 }; 2370 2371 static constexpr RegInfo RegularRegisters[] = { 2372 {{"v"}, IS_VGPR}, 2373 {{"s"}, IS_SGPR}, 2374 {{"ttmp"}, IS_TTMP}, 2375 {{"acc"}, IS_AGPR}, 2376 {{"a"}, IS_AGPR}, 2377 }; 2378 2379 static bool isRegularReg(RegisterKind Kind) { 2380 return Kind == IS_VGPR || 2381 Kind == IS_SGPR || 2382 Kind == IS_TTMP || 2383 Kind == IS_AGPR; 2384 } 2385 2386 static const RegInfo* getRegularRegInfo(StringRef Str) { 2387 for (const RegInfo &Reg : RegularRegisters) 2388 if (Str.startswith(Reg.Name)) 2389 return &Reg; 2390 return nullptr; 2391 } 2392 2393 static bool getRegNum(StringRef Str, unsigned& Num) { 2394 return !Str.getAsInteger(10, Num); 2395 } 2396 2397 bool 2398 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2399 const AsmToken &NextToken) const { 2400 2401 // A list of consecutive registers: [s0,s1,s2,s3] 2402 if (Token.is(AsmToken::LBrac)) 2403 return true; 2404 2405 if (!Token.is(AsmToken::Identifier)) 2406 return false; 2407 2408 // A single register like s0 or a range of registers like s[0:1] 2409 2410 StringRef Str = Token.getString(); 2411 const RegInfo *Reg = getRegularRegInfo(Str); 2412 if (Reg) { 2413 StringRef RegName = Reg->Name; 2414 StringRef RegSuffix = Str.substr(RegName.size()); 2415 if (!RegSuffix.empty()) { 2416 unsigned Num; 2417 // A single register with an index: rXX 2418 if (getRegNum(RegSuffix, Num)) 2419 return true; 2420 } else { 2421 // A range of registers: r[XX:YY]. 2422 if (NextToken.is(AsmToken::LBrac)) 2423 return true; 2424 } 2425 } 2426 2427 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2428 } 2429 2430 bool 2431 AMDGPUAsmParser::isRegister() 2432 { 2433 return isRegister(getToken(), peekToken()); 2434 } 2435 2436 unsigned 2437 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2438 unsigned RegNum, 2439 unsigned RegWidth, 2440 SMLoc Loc) { 2441 2442 assert(isRegularReg(RegKind)); 2443 2444 unsigned AlignSize = 1; 2445 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2446 // SGPR and TTMP registers must be aligned. 2447 // Max required alignment is 4 dwords. 2448 AlignSize = std::min(RegWidth, 4u); 2449 } 2450 2451 if (RegNum % AlignSize != 0) { 2452 Error(Loc, "invalid register alignment"); 2453 return AMDGPU::NoRegister; 2454 } 2455 2456 unsigned RegIdx = RegNum / AlignSize; 2457 int RCID = getRegClass(RegKind, RegWidth); 2458 if (RCID == -1) { 2459 Error(Loc, "invalid or unsupported register size"); 2460 return AMDGPU::NoRegister; 2461 } 2462 2463 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2464 const MCRegisterClass RC = TRI->getRegClass(RCID); 2465 if (RegIdx >= RC.getNumRegs()) { 2466 Error(Loc, "register index is out of range"); 2467 return AMDGPU::NoRegister; 2468 } 2469 2470 return RC.getRegister(RegIdx); 2471 } 2472 2473 bool 2474 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2475 int64_t RegLo, RegHi; 2476 if (!skipToken(AsmToken::LBrac, "missing register index")) 2477 return false; 2478 2479 SMLoc FirstIdxLoc = getLoc(); 2480 SMLoc SecondIdxLoc; 2481 2482 if (!parseExpr(RegLo)) 2483 return false; 2484 2485 if (trySkipToken(AsmToken::Colon)) { 2486 SecondIdxLoc = getLoc(); 2487 if (!parseExpr(RegHi)) 2488 return false; 2489 } else { 2490 RegHi = RegLo; 2491 } 2492 2493 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2494 return false; 2495 2496 if (!isUInt<32>(RegLo)) { 2497 Error(FirstIdxLoc, "invalid register index"); 2498 return false; 2499 } 2500 2501 if (!isUInt<32>(RegHi)) { 2502 Error(SecondIdxLoc, "invalid register index"); 2503 return false; 2504 } 2505 2506 if (RegLo > RegHi) { 2507 Error(FirstIdxLoc, "first register index should not exceed second index"); 2508 return false; 2509 } 2510 2511 Num = static_cast<unsigned>(RegLo); 2512 Width = (RegHi - RegLo) + 1; 2513 return true; 2514 } 2515 2516 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2517 unsigned &RegNum, unsigned &RegWidth, 2518 SmallVectorImpl<AsmToken> &Tokens) { 2519 assert(isToken(AsmToken::Identifier)); 2520 unsigned Reg = getSpecialRegForName(getTokenStr()); 2521 if (Reg) { 2522 RegNum = 0; 2523 RegWidth = 1; 2524 RegKind = IS_SPECIAL; 2525 Tokens.push_back(getToken()); 2526 lex(); // skip register name 2527 } 2528 return Reg; 2529 } 2530 2531 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2532 unsigned &RegNum, unsigned &RegWidth, 2533 SmallVectorImpl<AsmToken> &Tokens) { 2534 assert(isToken(AsmToken::Identifier)); 2535 StringRef RegName = getTokenStr(); 2536 auto Loc = getLoc(); 2537 2538 const RegInfo *RI = getRegularRegInfo(RegName); 2539 if (!RI) { 2540 Error(Loc, "invalid register name"); 2541 return AMDGPU::NoRegister; 2542 } 2543 2544 Tokens.push_back(getToken()); 2545 lex(); // skip register name 2546 2547 RegKind = RI->Kind; 2548 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2549 if (!RegSuffix.empty()) { 2550 // Single 32-bit register: vXX. 2551 if (!getRegNum(RegSuffix, RegNum)) { 2552 Error(Loc, "invalid register index"); 2553 return AMDGPU::NoRegister; 2554 } 2555 RegWidth = 1; 2556 } else { 2557 // Range of registers: v[XX:YY]. ":YY" is optional. 2558 if (!ParseRegRange(RegNum, RegWidth)) 2559 return AMDGPU::NoRegister; 2560 } 2561 2562 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2563 } 2564 2565 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2566 unsigned &RegWidth, 2567 SmallVectorImpl<AsmToken> &Tokens) { 2568 unsigned Reg = AMDGPU::NoRegister; 2569 auto ListLoc = getLoc(); 2570 2571 if (!skipToken(AsmToken::LBrac, 2572 "expected a register or a list of registers")) { 2573 return AMDGPU::NoRegister; 2574 } 2575 2576 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2577 2578 auto Loc = getLoc(); 2579 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2580 return AMDGPU::NoRegister; 2581 if (RegWidth != 1) { 2582 Error(Loc, "expected a single 32-bit register"); 2583 return AMDGPU::NoRegister; 2584 } 2585 2586 for (; trySkipToken(AsmToken::Comma); ) { 2587 RegisterKind NextRegKind; 2588 unsigned NextReg, NextRegNum, NextRegWidth; 2589 Loc = getLoc(); 2590 2591 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2592 NextRegNum, NextRegWidth, 2593 Tokens)) { 2594 return AMDGPU::NoRegister; 2595 } 2596 if (NextRegWidth != 1) { 2597 Error(Loc, "expected a single 32-bit register"); 2598 return AMDGPU::NoRegister; 2599 } 2600 if (NextRegKind != RegKind) { 2601 Error(Loc, "registers in a list must be of the same kind"); 2602 return AMDGPU::NoRegister; 2603 } 2604 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2605 return AMDGPU::NoRegister; 2606 } 2607 2608 if (!skipToken(AsmToken::RBrac, 2609 "expected a comma or a closing square bracket")) { 2610 return AMDGPU::NoRegister; 2611 } 2612 2613 if (isRegularReg(RegKind)) 2614 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2615 2616 return Reg; 2617 } 2618 2619 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2620 unsigned &RegNum, unsigned &RegWidth, 2621 SmallVectorImpl<AsmToken> &Tokens) { 2622 auto Loc = getLoc(); 2623 Reg = AMDGPU::NoRegister; 2624 2625 if (isToken(AsmToken::Identifier)) { 2626 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2627 if (Reg == AMDGPU::NoRegister) 2628 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2629 } else { 2630 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2631 } 2632 2633 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2634 if (Reg == AMDGPU::NoRegister) { 2635 assert(Parser.hasPendingError()); 2636 return false; 2637 } 2638 2639 if (!subtargetHasRegister(*TRI, Reg)) { 2640 if (Reg == AMDGPU::SGPR_NULL) { 2641 Error(Loc, "'null' operand is not supported on this GPU"); 2642 } else { 2643 Error(Loc, "register not available on this GPU"); 2644 } 2645 return false; 2646 } 2647 2648 return true; 2649 } 2650 2651 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2652 unsigned &RegNum, unsigned &RegWidth, 2653 bool RestoreOnFailure /*=false*/) { 2654 Reg = AMDGPU::NoRegister; 2655 2656 SmallVector<AsmToken, 1> Tokens; 2657 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2658 if (RestoreOnFailure) { 2659 while (!Tokens.empty()) { 2660 getLexer().UnLex(Tokens.pop_back_val()); 2661 } 2662 } 2663 return true; 2664 } 2665 return false; 2666 } 2667 2668 Optional<StringRef> 2669 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2670 switch (RegKind) { 2671 case IS_VGPR: 2672 return StringRef(".amdgcn.next_free_vgpr"); 2673 case IS_SGPR: 2674 return StringRef(".amdgcn.next_free_sgpr"); 2675 default: 2676 return None; 2677 } 2678 } 2679 2680 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2681 auto SymbolName = getGprCountSymbolName(RegKind); 2682 assert(SymbolName && "initializing invalid register kind"); 2683 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2684 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2685 } 2686 2687 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2688 unsigned DwordRegIndex, 2689 unsigned RegWidth) { 2690 // Symbols are only defined for GCN targets 2691 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2692 return true; 2693 2694 auto SymbolName = getGprCountSymbolName(RegKind); 2695 if (!SymbolName) 2696 return true; 2697 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2698 2699 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2700 int64_t OldCount; 2701 2702 if (!Sym->isVariable()) 2703 return !Error(getLoc(), 2704 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2705 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2706 return !Error( 2707 getLoc(), 2708 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2709 2710 if (OldCount <= NewMax) 2711 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2712 2713 return true; 2714 } 2715 2716 std::unique_ptr<AMDGPUOperand> 2717 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2718 const auto &Tok = getToken(); 2719 SMLoc StartLoc = Tok.getLoc(); 2720 SMLoc EndLoc = Tok.getEndLoc(); 2721 RegisterKind RegKind; 2722 unsigned Reg, RegNum, RegWidth; 2723 2724 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2725 return nullptr; 2726 } 2727 if (isHsaAbiVersion3Or4(&getSTI())) { 2728 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2729 return nullptr; 2730 } else 2731 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2732 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2733 } 2734 2735 OperandMatchResultTy 2736 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2737 // TODO: add syntactic sugar for 1/(2*PI) 2738 2739 assert(!isRegister()); 2740 assert(!isModifier()); 2741 2742 const auto& Tok = getToken(); 2743 const auto& NextTok = peekToken(); 2744 bool IsReal = Tok.is(AsmToken::Real); 2745 SMLoc S = getLoc(); 2746 bool Negate = false; 2747 2748 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2749 lex(); 2750 IsReal = true; 2751 Negate = true; 2752 } 2753 2754 if (IsReal) { 2755 // Floating-point expressions are not supported. 2756 // Can only allow floating-point literals with an 2757 // optional sign. 2758 2759 StringRef Num = getTokenStr(); 2760 lex(); 2761 2762 APFloat RealVal(APFloat::IEEEdouble()); 2763 auto roundMode = APFloat::rmNearestTiesToEven; 2764 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2765 return MatchOperand_ParseFail; 2766 } 2767 if (Negate) 2768 RealVal.changeSign(); 2769 2770 Operands.push_back( 2771 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2772 AMDGPUOperand::ImmTyNone, true)); 2773 2774 return MatchOperand_Success; 2775 2776 } else { 2777 int64_t IntVal; 2778 const MCExpr *Expr; 2779 SMLoc S = getLoc(); 2780 2781 if (HasSP3AbsModifier) { 2782 // This is a workaround for handling expressions 2783 // as arguments of SP3 'abs' modifier, for example: 2784 // |1.0| 2785 // |-1| 2786 // |1+x| 2787 // This syntax is not compatible with syntax of standard 2788 // MC expressions (due to the trailing '|'). 2789 SMLoc EndLoc; 2790 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2791 return MatchOperand_ParseFail; 2792 } else { 2793 if (Parser.parseExpression(Expr)) 2794 return MatchOperand_ParseFail; 2795 } 2796 2797 if (Expr->evaluateAsAbsolute(IntVal)) { 2798 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2799 } else { 2800 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2801 } 2802 2803 return MatchOperand_Success; 2804 } 2805 2806 return MatchOperand_NoMatch; 2807 } 2808 2809 OperandMatchResultTy 2810 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2811 if (!isRegister()) 2812 return MatchOperand_NoMatch; 2813 2814 if (auto R = parseRegister()) { 2815 assert(R->isReg()); 2816 Operands.push_back(std::move(R)); 2817 return MatchOperand_Success; 2818 } 2819 return MatchOperand_ParseFail; 2820 } 2821 2822 OperandMatchResultTy 2823 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2824 auto res = parseReg(Operands); 2825 if (res != MatchOperand_NoMatch) { 2826 return res; 2827 } else if (isModifier()) { 2828 return MatchOperand_NoMatch; 2829 } else { 2830 return parseImm(Operands, HasSP3AbsMod); 2831 } 2832 } 2833 2834 bool 2835 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2836 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2837 const auto &str = Token.getString(); 2838 return str == "abs" || str == "neg" || str == "sext"; 2839 } 2840 return false; 2841 } 2842 2843 bool 2844 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2845 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2846 } 2847 2848 bool 2849 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2850 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2851 } 2852 2853 bool 2854 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2855 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2856 } 2857 2858 // Check if this is an operand modifier or an opcode modifier 2859 // which may look like an expression but it is not. We should 2860 // avoid parsing these modifiers as expressions. Currently 2861 // recognized sequences are: 2862 // |...| 2863 // abs(...) 2864 // neg(...) 2865 // sext(...) 2866 // -reg 2867 // -|...| 2868 // -abs(...) 2869 // name:... 2870 // Note that simple opcode modifiers like 'gds' may be parsed as 2871 // expressions; this is a special case. See getExpressionAsToken. 2872 // 2873 bool 2874 AMDGPUAsmParser::isModifier() { 2875 2876 AsmToken Tok = getToken(); 2877 AsmToken NextToken[2]; 2878 peekTokens(NextToken); 2879 2880 return isOperandModifier(Tok, NextToken[0]) || 2881 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2882 isOpcodeModifierWithVal(Tok, NextToken[0]); 2883 } 2884 2885 // Check if the current token is an SP3 'neg' modifier. 2886 // Currently this modifier is allowed in the following context: 2887 // 2888 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2889 // 2. Before an 'abs' modifier: -abs(...) 2890 // 3. Before an SP3 'abs' modifier: -|...| 2891 // 2892 // In all other cases "-" is handled as a part 2893 // of an expression that follows the sign. 2894 // 2895 // Note: When "-" is followed by an integer literal, 2896 // this is interpreted as integer negation rather 2897 // than a floating-point NEG modifier applied to N. 2898 // Beside being contr-intuitive, such use of floating-point 2899 // NEG modifier would have resulted in different meaning 2900 // of integer literals used with VOP1/2/C and VOP3, 2901 // for example: 2902 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2903 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2904 // Negative fp literals with preceding "-" are 2905 // handled likewise for unifomtity 2906 // 2907 bool 2908 AMDGPUAsmParser::parseSP3NegModifier() { 2909 2910 AsmToken NextToken[2]; 2911 peekTokens(NextToken); 2912 2913 if (isToken(AsmToken::Minus) && 2914 (isRegister(NextToken[0], NextToken[1]) || 2915 NextToken[0].is(AsmToken::Pipe) || 2916 isId(NextToken[0], "abs"))) { 2917 lex(); 2918 return true; 2919 } 2920 2921 return false; 2922 } 2923 2924 OperandMatchResultTy 2925 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2926 bool AllowImm) { 2927 bool Neg, SP3Neg; 2928 bool Abs, SP3Abs; 2929 SMLoc Loc; 2930 2931 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2932 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2933 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2934 return MatchOperand_ParseFail; 2935 } 2936 2937 SP3Neg = parseSP3NegModifier(); 2938 2939 Loc = getLoc(); 2940 Neg = trySkipId("neg"); 2941 if (Neg && SP3Neg) { 2942 Error(Loc, "expected register or immediate"); 2943 return MatchOperand_ParseFail; 2944 } 2945 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2946 return MatchOperand_ParseFail; 2947 2948 Abs = trySkipId("abs"); 2949 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2950 return MatchOperand_ParseFail; 2951 2952 Loc = getLoc(); 2953 SP3Abs = trySkipToken(AsmToken::Pipe); 2954 if (Abs && SP3Abs) { 2955 Error(Loc, "expected register or immediate"); 2956 return MatchOperand_ParseFail; 2957 } 2958 2959 OperandMatchResultTy Res; 2960 if (AllowImm) { 2961 Res = parseRegOrImm(Operands, SP3Abs); 2962 } else { 2963 Res = parseReg(Operands); 2964 } 2965 if (Res != MatchOperand_Success) { 2966 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2967 } 2968 2969 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2970 return MatchOperand_ParseFail; 2971 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2972 return MatchOperand_ParseFail; 2973 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2974 return MatchOperand_ParseFail; 2975 2976 AMDGPUOperand::Modifiers Mods; 2977 Mods.Abs = Abs || SP3Abs; 2978 Mods.Neg = Neg || SP3Neg; 2979 2980 if (Mods.hasFPModifiers()) { 2981 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2982 if (Op.isExpr()) { 2983 Error(Op.getStartLoc(), "expected an absolute expression"); 2984 return MatchOperand_ParseFail; 2985 } 2986 Op.setModifiers(Mods); 2987 } 2988 return MatchOperand_Success; 2989 } 2990 2991 OperandMatchResultTy 2992 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2993 bool AllowImm) { 2994 bool Sext = trySkipId("sext"); 2995 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2996 return MatchOperand_ParseFail; 2997 2998 OperandMatchResultTy Res; 2999 if (AllowImm) { 3000 Res = parseRegOrImm(Operands); 3001 } else { 3002 Res = parseReg(Operands); 3003 } 3004 if (Res != MatchOperand_Success) { 3005 return Sext? MatchOperand_ParseFail : Res; 3006 } 3007 3008 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3009 return MatchOperand_ParseFail; 3010 3011 AMDGPUOperand::Modifiers Mods; 3012 Mods.Sext = Sext; 3013 3014 if (Mods.hasIntModifiers()) { 3015 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3016 if (Op.isExpr()) { 3017 Error(Op.getStartLoc(), "expected an absolute expression"); 3018 return MatchOperand_ParseFail; 3019 } 3020 Op.setModifiers(Mods); 3021 } 3022 3023 return MatchOperand_Success; 3024 } 3025 3026 OperandMatchResultTy 3027 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3028 return parseRegOrImmWithFPInputMods(Operands, false); 3029 } 3030 3031 OperandMatchResultTy 3032 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3033 return parseRegOrImmWithIntInputMods(Operands, false); 3034 } 3035 3036 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3037 auto Loc = getLoc(); 3038 if (trySkipId("off")) { 3039 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3040 AMDGPUOperand::ImmTyOff, false)); 3041 return MatchOperand_Success; 3042 } 3043 3044 if (!isRegister()) 3045 return MatchOperand_NoMatch; 3046 3047 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3048 if (Reg) { 3049 Operands.push_back(std::move(Reg)); 3050 return MatchOperand_Success; 3051 } 3052 3053 return MatchOperand_ParseFail; 3054 3055 } 3056 3057 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3058 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3059 3060 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3061 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3062 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3063 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3064 return Match_InvalidOperand; 3065 3066 if ((TSFlags & SIInstrFlags::VOP3) && 3067 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3068 getForcedEncodingSize() != 64) 3069 return Match_PreferE32; 3070 3071 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3072 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3073 // v_mac_f32/16 allow only dst_sel == DWORD; 3074 auto OpNum = 3075 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3076 const auto &Op = Inst.getOperand(OpNum); 3077 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3078 return Match_InvalidOperand; 3079 } 3080 } 3081 3082 return Match_Success; 3083 } 3084 3085 static ArrayRef<unsigned> getAllVariants() { 3086 static const unsigned Variants[] = { 3087 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3088 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3089 }; 3090 3091 return makeArrayRef(Variants); 3092 } 3093 3094 // What asm variants we should check 3095 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3096 if (getForcedEncodingSize() == 32) { 3097 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3098 return makeArrayRef(Variants); 3099 } 3100 3101 if (isForcedVOP3()) { 3102 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3103 return makeArrayRef(Variants); 3104 } 3105 3106 if (isForcedSDWA()) { 3107 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3108 AMDGPUAsmVariants::SDWA9}; 3109 return makeArrayRef(Variants); 3110 } 3111 3112 if (isForcedDPP()) { 3113 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3114 return makeArrayRef(Variants); 3115 } 3116 3117 return getAllVariants(); 3118 } 3119 3120 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3121 if (getForcedEncodingSize() == 32) 3122 return "e32"; 3123 3124 if (isForcedVOP3()) 3125 return "e64"; 3126 3127 if (isForcedSDWA()) 3128 return "sdwa"; 3129 3130 if (isForcedDPP()) 3131 return "dpp"; 3132 3133 return ""; 3134 } 3135 3136 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3137 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3138 const unsigned Num = Desc.getNumImplicitUses(); 3139 for (unsigned i = 0; i < Num; ++i) { 3140 unsigned Reg = Desc.ImplicitUses[i]; 3141 switch (Reg) { 3142 case AMDGPU::FLAT_SCR: 3143 case AMDGPU::VCC: 3144 case AMDGPU::VCC_LO: 3145 case AMDGPU::VCC_HI: 3146 case AMDGPU::M0: 3147 return Reg; 3148 default: 3149 break; 3150 } 3151 } 3152 return AMDGPU::NoRegister; 3153 } 3154 3155 // NB: This code is correct only when used to check constant 3156 // bus limitations because GFX7 support no f16 inline constants. 3157 // Note that there are no cases when a GFX7 opcode violates 3158 // constant bus limitations due to the use of an f16 constant. 3159 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3160 unsigned OpIdx) const { 3161 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3162 3163 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3164 return false; 3165 } 3166 3167 const MCOperand &MO = Inst.getOperand(OpIdx); 3168 3169 int64_t Val = MO.getImm(); 3170 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3171 3172 switch (OpSize) { // expected operand size 3173 case 8: 3174 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3175 case 4: 3176 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3177 case 2: { 3178 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3179 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3180 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3181 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3182 return AMDGPU::isInlinableIntLiteral(Val); 3183 3184 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3185 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3186 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3187 return AMDGPU::isInlinableIntLiteralV216(Val); 3188 3189 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3190 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3191 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3192 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3193 3194 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3195 } 3196 default: 3197 llvm_unreachable("invalid operand size"); 3198 } 3199 } 3200 3201 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3202 if (!isGFX10Plus()) 3203 return 1; 3204 3205 switch (Opcode) { 3206 // 64-bit shift instructions can use only one scalar value input 3207 case AMDGPU::V_LSHLREV_B64_e64: 3208 case AMDGPU::V_LSHLREV_B64_gfx10: 3209 case AMDGPU::V_LSHRREV_B64_e64: 3210 case AMDGPU::V_LSHRREV_B64_gfx10: 3211 case AMDGPU::V_ASHRREV_I64_e64: 3212 case AMDGPU::V_ASHRREV_I64_gfx10: 3213 case AMDGPU::V_LSHL_B64_e64: 3214 case AMDGPU::V_LSHR_B64_e64: 3215 case AMDGPU::V_ASHR_I64_e64: 3216 return 1; 3217 default: 3218 return 2; 3219 } 3220 } 3221 3222 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3223 const MCOperand &MO = Inst.getOperand(OpIdx); 3224 if (MO.isImm()) { 3225 return !isInlineConstant(Inst, OpIdx); 3226 } else if (MO.isReg()) { 3227 auto Reg = MO.getReg(); 3228 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3229 auto PReg = mc2PseudoReg(Reg); 3230 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3231 } else { 3232 return true; 3233 } 3234 } 3235 3236 bool 3237 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3238 const OperandVector &Operands) { 3239 const unsigned Opcode = Inst.getOpcode(); 3240 const MCInstrDesc &Desc = MII.get(Opcode); 3241 unsigned LastSGPR = AMDGPU::NoRegister; 3242 unsigned ConstantBusUseCount = 0; 3243 unsigned NumLiterals = 0; 3244 unsigned LiteralSize; 3245 3246 if (Desc.TSFlags & 3247 (SIInstrFlags::VOPC | 3248 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3249 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3250 SIInstrFlags::SDWA)) { 3251 // Check special imm operands (used by madmk, etc) 3252 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3253 ++ConstantBusUseCount; 3254 } 3255 3256 SmallDenseSet<unsigned> SGPRsUsed; 3257 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3258 if (SGPRUsed != AMDGPU::NoRegister) { 3259 SGPRsUsed.insert(SGPRUsed); 3260 ++ConstantBusUseCount; 3261 } 3262 3263 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3264 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3265 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3266 3267 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3268 3269 for (int OpIdx : OpIndices) { 3270 if (OpIdx == -1) break; 3271 3272 const MCOperand &MO = Inst.getOperand(OpIdx); 3273 if (usesConstantBus(Inst, OpIdx)) { 3274 if (MO.isReg()) { 3275 LastSGPR = mc2PseudoReg(MO.getReg()); 3276 // Pairs of registers with a partial intersections like these 3277 // s0, s[0:1] 3278 // flat_scratch_lo, flat_scratch 3279 // flat_scratch_lo, flat_scratch_hi 3280 // are theoretically valid but they are disabled anyway. 3281 // Note that this code mimics SIInstrInfo::verifyInstruction 3282 if (!SGPRsUsed.count(LastSGPR)) { 3283 SGPRsUsed.insert(LastSGPR); 3284 ++ConstantBusUseCount; 3285 } 3286 } else { // Expression or a literal 3287 3288 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3289 continue; // special operand like VINTERP attr_chan 3290 3291 // An instruction may use only one literal. 3292 // This has been validated on the previous step. 3293 // See validateVOP3Literal. 3294 // This literal may be used as more than one operand. 3295 // If all these operands are of the same size, 3296 // this literal counts as one scalar value. 3297 // Otherwise it counts as 2 scalar values. 3298 // See "GFX10 Shader Programming", section 3.6.2.3. 3299 3300 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3301 if (Size < 4) Size = 4; 3302 3303 if (NumLiterals == 0) { 3304 NumLiterals = 1; 3305 LiteralSize = Size; 3306 } else if (LiteralSize != Size) { 3307 NumLiterals = 2; 3308 } 3309 } 3310 } 3311 } 3312 } 3313 ConstantBusUseCount += NumLiterals; 3314 3315 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3316 return true; 3317 3318 SMLoc LitLoc = getLitLoc(Operands); 3319 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3320 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3321 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3322 return false; 3323 } 3324 3325 bool 3326 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3327 const OperandVector &Operands) { 3328 const unsigned Opcode = Inst.getOpcode(); 3329 const MCInstrDesc &Desc = MII.get(Opcode); 3330 3331 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3332 if (DstIdx == -1 || 3333 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3334 return true; 3335 } 3336 3337 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3338 3339 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3340 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3341 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3342 3343 assert(DstIdx != -1); 3344 const MCOperand &Dst = Inst.getOperand(DstIdx); 3345 assert(Dst.isReg()); 3346 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3347 3348 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3349 3350 for (int SrcIdx : SrcIndices) { 3351 if (SrcIdx == -1) break; 3352 const MCOperand &Src = Inst.getOperand(SrcIdx); 3353 if (Src.isReg()) { 3354 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3355 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3356 Error(getRegLoc(SrcReg, Operands), 3357 "destination must be different than all sources"); 3358 return false; 3359 } 3360 } 3361 } 3362 3363 return true; 3364 } 3365 3366 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3367 3368 const unsigned Opc = Inst.getOpcode(); 3369 const MCInstrDesc &Desc = MII.get(Opc); 3370 3371 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3372 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3373 assert(ClampIdx != -1); 3374 return Inst.getOperand(ClampIdx).getImm() == 0; 3375 } 3376 3377 return true; 3378 } 3379 3380 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3381 3382 const unsigned Opc = Inst.getOpcode(); 3383 const MCInstrDesc &Desc = MII.get(Opc); 3384 3385 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3386 return true; 3387 3388 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3389 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3390 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3391 3392 assert(VDataIdx != -1); 3393 3394 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3395 return true; 3396 3397 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3398 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3399 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3400 if (DMask == 0) 3401 DMask = 1; 3402 3403 unsigned DataSize = 3404 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3405 if (hasPackedD16()) { 3406 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3407 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3408 DataSize = (DataSize + 1) / 2; 3409 } 3410 3411 return (VDataSize / 4) == DataSize + TFESize; 3412 } 3413 3414 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3415 const unsigned Opc = Inst.getOpcode(); 3416 const MCInstrDesc &Desc = MII.get(Opc); 3417 3418 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3419 return true; 3420 3421 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3422 3423 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3424 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3425 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3426 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3427 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3428 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3429 3430 assert(VAddr0Idx != -1); 3431 assert(SrsrcIdx != -1); 3432 assert(SrsrcIdx > VAddr0Idx); 3433 3434 if (DimIdx == -1) 3435 return true; // intersect_ray 3436 3437 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3438 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3439 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3440 unsigned ActualAddrSize = 3441 IsNSA ? SrsrcIdx - VAddr0Idx 3442 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3443 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3444 3445 unsigned ExpectedAddrSize = 3446 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3447 3448 if (!IsNSA) { 3449 if (ExpectedAddrSize > 8) 3450 ExpectedAddrSize = 16; 3451 else if (ExpectedAddrSize > 5) 3452 ExpectedAddrSize = 8; 3453 3454 // Allow oversized 8 VGPR vaddr when only 5 VGPR are required. 3455 // This provides backward compatibility for assembly created 3456 // before 160b types were directly supported. 3457 if (ExpectedAddrSize == 5 && ActualAddrSize == 8) 3458 return true; 3459 } 3460 3461 return ActualAddrSize == ExpectedAddrSize; 3462 } 3463 3464 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3465 3466 const unsigned Opc = Inst.getOpcode(); 3467 const MCInstrDesc &Desc = MII.get(Opc); 3468 3469 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3470 return true; 3471 if (!Desc.mayLoad() || !Desc.mayStore()) 3472 return true; // Not atomic 3473 3474 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3475 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3476 3477 // This is an incomplete check because image_atomic_cmpswap 3478 // may only use 0x3 and 0xf while other atomic operations 3479 // may use 0x1 and 0x3. However these limitations are 3480 // verified when we check that dmask matches dst size. 3481 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3482 } 3483 3484 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3485 3486 const unsigned Opc = Inst.getOpcode(); 3487 const MCInstrDesc &Desc = MII.get(Opc); 3488 3489 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3490 return true; 3491 3492 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3493 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3494 3495 // GATHER4 instructions use dmask in a different fashion compared to 3496 // other MIMG instructions. The only useful DMASK values are 3497 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3498 // (red,red,red,red) etc.) The ISA document doesn't mention 3499 // this. 3500 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3501 } 3502 3503 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3504 const unsigned Opc = Inst.getOpcode(); 3505 const MCInstrDesc &Desc = MII.get(Opc); 3506 3507 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3508 return true; 3509 3510 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3511 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3512 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3513 3514 if (!BaseOpcode->MSAA) 3515 return true; 3516 3517 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3518 assert(DimIdx != -1); 3519 3520 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3521 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3522 3523 return DimInfo->MSAA; 3524 } 3525 3526 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3527 { 3528 switch (Opcode) { 3529 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3530 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3531 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3532 return true; 3533 default: 3534 return false; 3535 } 3536 } 3537 3538 // movrels* opcodes should only allow VGPRS as src0. 3539 // This is specified in .td description for vop1/vop3, 3540 // but sdwa is handled differently. See isSDWAOperand. 3541 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3542 const OperandVector &Operands) { 3543 3544 const unsigned Opc = Inst.getOpcode(); 3545 const MCInstrDesc &Desc = MII.get(Opc); 3546 3547 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3548 return true; 3549 3550 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3551 assert(Src0Idx != -1); 3552 3553 SMLoc ErrLoc; 3554 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3555 if (Src0.isReg()) { 3556 auto Reg = mc2PseudoReg(Src0.getReg()); 3557 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3558 if (!isSGPR(Reg, TRI)) 3559 return true; 3560 ErrLoc = getRegLoc(Reg, Operands); 3561 } else { 3562 ErrLoc = getConstLoc(Operands); 3563 } 3564 3565 Error(ErrLoc, "source operand must be a VGPR"); 3566 return false; 3567 } 3568 3569 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3570 const OperandVector &Operands) { 3571 3572 const unsigned Opc = Inst.getOpcode(); 3573 3574 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3575 return true; 3576 3577 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3578 assert(Src0Idx != -1); 3579 3580 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3581 if (!Src0.isReg()) 3582 return true; 3583 3584 auto Reg = mc2PseudoReg(Src0.getReg()); 3585 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3586 if (isSGPR(Reg, TRI)) { 3587 Error(getRegLoc(Reg, Operands), 3588 "source operand must be either a VGPR or an inline constant"); 3589 return false; 3590 } 3591 3592 return true; 3593 } 3594 3595 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3596 switch (Inst.getOpcode()) { 3597 default: 3598 return true; 3599 case V_DIV_SCALE_F32_gfx6_gfx7: 3600 case V_DIV_SCALE_F32_vi: 3601 case V_DIV_SCALE_F32_gfx10: 3602 case V_DIV_SCALE_F64_gfx6_gfx7: 3603 case V_DIV_SCALE_F64_vi: 3604 case V_DIV_SCALE_F64_gfx10: 3605 break; 3606 } 3607 3608 // TODO: Check that src0 = src1 or src2. 3609 3610 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3611 AMDGPU::OpName::src2_modifiers, 3612 AMDGPU::OpName::src2_modifiers}) { 3613 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3614 .getImm() & 3615 SISrcMods::ABS) { 3616 return false; 3617 } 3618 } 3619 3620 return true; 3621 } 3622 3623 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3624 3625 const unsigned Opc = Inst.getOpcode(); 3626 const MCInstrDesc &Desc = MII.get(Opc); 3627 3628 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3629 return true; 3630 3631 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3632 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3633 if (isCI() || isSI()) 3634 return false; 3635 } 3636 3637 return true; 3638 } 3639 3640 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3641 const unsigned Opc = Inst.getOpcode(); 3642 const MCInstrDesc &Desc = MII.get(Opc); 3643 3644 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3645 return true; 3646 3647 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3648 if (DimIdx < 0) 3649 return true; 3650 3651 long Imm = Inst.getOperand(DimIdx).getImm(); 3652 if (Imm < 0 || Imm >= 8) 3653 return false; 3654 3655 return true; 3656 } 3657 3658 static bool IsRevOpcode(const unsigned Opcode) 3659 { 3660 switch (Opcode) { 3661 case AMDGPU::V_SUBREV_F32_e32: 3662 case AMDGPU::V_SUBREV_F32_e64: 3663 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3664 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3665 case AMDGPU::V_SUBREV_F32_e32_vi: 3666 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3667 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3668 case AMDGPU::V_SUBREV_F32_e64_vi: 3669 3670 case AMDGPU::V_SUBREV_CO_U32_e32: 3671 case AMDGPU::V_SUBREV_CO_U32_e64: 3672 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3673 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3674 3675 case AMDGPU::V_SUBBREV_U32_e32: 3676 case AMDGPU::V_SUBBREV_U32_e64: 3677 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3678 case AMDGPU::V_SUBBREV_U32_e32_vi: 3679 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3680 case AMDGPU::V_SUBBREV_U32_e64_vi: 3681 3682 case AMDGPU::V_SUBREV_U32_e32: 3683 case AMDGPU::V_SUBREV_U32_e64: 3684 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3685 case AMDGPU::V_SUBREV_U32_e32_vi: 3686 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3687 case AMDGPU::V_SUBREV_U32_e64_vi: 3688 3689 case AMDGPU::V_SUBREV_F16_e32: 3690 case AMDGPU::V_SUBREV_F16_e64: 3691 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3692 case AMDGPU::V_SUBREV_F16_e32_vi: 3693 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3694 case AMDGPU::V_SUBREV_F16_e64_vi: 3695 3696 case AMDGPU::V_SUBREV_U16_e32: 3697 case AMDGPU::V_SUBREV_U16_e64: 3698 case AMDGPU::V_SUBREV_U16_e32_vi: 3699 case AMDGPU::V_SUBREV_U16_e64_vi: 3700 3701 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3702 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3703 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3704 3705 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3706 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3707 3708 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3709 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3710 3711 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3712 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3713 3714 case AMDGPU::V_LSHRREV_B32_e32: 3715 case AMDGPU::V_LSHRREV_B32_e64: 3716 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3717 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3718 case AMDGPU::V_LSHRREV_B32_e32_vi: 3719 case AMDGPU::V_LSHRREV_B32_e64_vi: 3720 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3721 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3722 3723 case AMDGPU::V_ASHRREV_I32_e32: 3724 case AMDGPU::V_ASHRREV_I32_e64: 3725 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3726 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3727 case AMDGPU::V_ASHRREV_I32_e32_vi: 3728 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3729 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3730 case AMDGPU::V_ASHRREV_I32_e64_vi: 3731 3732 case AMDGPU::V_LSHLREV_B32_e32: 3733 case AMDGPU::V_LSHLREV_B32_e64: 3734 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3735 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3736 case AMDGPU::V_LSHLREV_B32_e32_vi: 3737 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3738 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3739 case AMDGPU::V_LSHLREV_B32_e64_vi: 3740 3741 case AMDGPU::V_LSHLREV_B16_e32: 3742 case AMDGPU::V_LSHLREV_B16_e64: 3743 case AMDGPU::V_LSHLREV_B16_e32_vi: 3744 case AMDGPU::V_LSHLREV_B16_e64_vi: 3745 case AMDGPU::V_LSHLREV_B16_gfx10: 3746 3747 case AMDGPU::V_LSHRREV_B16_e32: 3748 case AMDGPU::V_LSHRREV_B16_e64: 3749 case AMDGPU::V_LSHRREV_B16_e32_vi: 3750 case AMDGPU::V_LSHRREV_B16_e64_vi: 3751 case AMDGPU::V_LSHRREV_B16_gfx10: 3752 3753 case AMDGPU::V_ASHRREV_I16_e32: 3754 case AMDGPU::V_ASHRREV_I16_e64: 3755 case AMDGPU::V_ASHRREV_I16_e32_vi: 3756 case AMDGPU::V_ASHRREV_I16_e64_vi: 3757 case AMDGPU::V_ASHRREV_I16_gfx10: 3758 3759 case AMDGPU::V_LSHLREV_B64_e64: 3760 case AMDGPU::V_LSHLREV_B64_gfx10: 3761 case AMDGPU::V_LSHLREV_B64_vi: 3762 3763 case AMDGPU::V_LSHRREV_B64_e64: 3764 case AMDGPU::V_LSHRREV_B64_gfx10: 3765 case AMDGPU::V_LSHRREV_B64_vi: 3766 3767 case AMDGPU::V_ASHRREV_I64_e64: 3768 case AMDGPU::V_ASHRREV_I64_gfx10: 3769 case AMDGPU::V_ASHRREV_I64_vi: 3770 3771 case AMDGPU::V_PK_LSHLREV_B16: 3772 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3773 case AMDGPU::V_PK_LSHLREV_B16_vi: 3774 3775 case AMDGPU::V_PK_LSHRREV_B16: 3776 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3777 case AMDGPU::V_PK_LSHRREV_B16_vi: 3778 case AMDGPU::V_PK_ASHRREV_I16: 3779 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3780 case AMDGPU::V_PK_ASHRREV_I16_vi: 3781 return true; 3782 default: 3783 return false; 3784 } 3785 } 3786 3787 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3788 3789 using namespace SIInstrFlags; 3790 const unsigned Opcode = Inst.getOpcode(); 3791 const MCInstrDesc &Desc = MII.get(Opcode); 3792 3793 // lds_direct register is defined so that it can be used 3794 // with 9-bit operands only. Ignore encodings which do not accept these. 3795 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3796 if ((Desc.TSFlags & Enc) == 0) 3797 return None; 3798 3799 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3800 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3801 if (SrcIdx == -1) 3802 break; 3803 const auto &Src = Inst.getOperand(SrcIdx); 3804 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3805 3806 if (isGFX90A()) 3807 return StringRef("lds_direct is not supported on this GPU"); 3808 3809 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3810 return StringRef("lds_direct cannot be used with this instruction"); 3811 3812 if (SrcName != OpName::src0) 3813 return StringRef("lds_direct may be used as src0 only"); 3814 } 3815 } 3816 3817 return None; 3818 } 3819 3820 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3821 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3822 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3823 if (Op.isFlatOffset()) 3824 return Op.getStartLoc(); 3825 } 3826 return getLoc(); 3827 } 3828 3829 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3830 const OperandVector &Operands) { 3831 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3832 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3833 return true; 3834 3835 auto Opcode = Inst.getOpcode(); 3836 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3837 assert(OpNum != -1); 3838 3839 const auto &Op = Inst.getOperand(OpNum); 3840 if (!hasFlatOffsets() && Op.getImm() != 0) { 3841 Error(getFlatOffsetLoc(Operands), 3842 "flat offset modifier is not supported on this GPU"); 3843 return false; 3844 } 3845 3846 // For FLAT segment the offset must be positive; 3847 // MSB is ignored and forced to zero. 3848 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3849 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3850 if (!isIntN(OffsetSize, Op.getImm())) { 3851 Error(getFlatOffsetLoc(Operands), 3852 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3853 return false; 3854 } 3855 } else { 3856 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3857 if (!isUIntN(OffsetSize, Op.getImm())) { 3858 Error(getFlatOffsetLoc(Operands), 3859 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3860 return false; 3861 } 3862 } 3863 3864 return true; 3865 } 3866 3867 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3868 // Start with second operand because SMEM Offset cannot be dst or src0. 3869 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3870 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3871 if (Op.isSMEMOffset()) 3872 return Op.getStartLoc(); 3873 } 3874 return getLoc(); 3875 } 3876 3877 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3878 const OperandVector &Operands) { 3879 if (isCI() || isSI()) 3880 return true; 3881 3882 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3883 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3884 return true; 3885 3886 auto Opcode = Inst.getOpcode(); 3887 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3888 if (OpNum == -1) 3889 return true; 3890 3891 const auto &Op = Inst.getOperand(OpNum); 3892 if (!Op.isImm()) 3893 return true; 3894 3895 uint64_t Offset = Op.getImm(); 3896 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3897 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3898 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3899 return true; 3900 3901 Error(getSMEMOffsetLoc(Operands), 3902 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3903 "expected a 21-bit signed offset"); 3904 3905 return false; 3906 } 3907 3908 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3909 unsigned Opcode = Inst.getOpcode(); 3910 const MCInstrDesc &Desc = MII.get(Opcode); 3911 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3912 return true; 3913 3914 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3915 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3916 3917 const int OpIndices[] = { Src0Idx, Src1Idx }; 3918 3919 unsigned NumExprs = 0; 3920 unsigned NumLiterals = 0; 3921 uint32_t LiteralValue; 3922 3923 for (int OpIdx : OpIndices) { 3924 if (OpIdx == -1) break; 3925 3926 const MCOperand &MO = Inst.getOperand(OpIdx); 3927 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3928 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3929 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3930 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3931 if (NumLiterals == 0 || LiteralValue != Value) { 3932 LiteralValue = Value; 3933 ++NumLiterals; 3934 } 3935 } else if (MO.isExpr()) { 3936 ++NumExprs; 3937 } 3938 } 3939 } 3940 3941 return NumLiterals + NumExprs <= 1; 3942 } 3943 3944 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3945 const unsigned Opc = Inst.getOpcode(); 3946 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3947 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3948 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3949 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3950 3951 if (OpSel & ~3) 3952 return false; 3953 } 3954 return true; 3955 } 3956 3957 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3958 const OperandVector &Operands) { 3959 const unsigned Opc = Inst.getOpcode(); 3960 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3961 if (DppCtrlIdx < 0) 3962 return true; 3963 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3964 3965 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3966 // DPP64 is supported for row_newbcast only. 3967 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3968 if (Src0Idx >= 0 && 3969 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3970 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3971 Error(S, "64 bit dpp only supports row_newbcast"); 3972 return false; 3973 } 3974 } 3975 3976 return true; 3977 } 3978 3979 // Check if VCC register matches wavefront size 3980 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3981 auto FB = getFeatureBits(); 3982 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3983 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3984 } 3985 3986 // VOP3 literal is only allowed in GFX10+ and only one can be used 3987 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3988 const OperandVector &Operands) { 3989 unsigned Opcode = Inst.getOpcode(); 3990 const MCInstrDesc &Desc = MII.get(Opcode); 3991 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3992 return true; 3993 3994 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3995 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3996 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3997 3998 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3999 4000 unsigned NumExprs = 0; 4001 unsigned NumLiterals = 0; 4002 uint32_t LiteralValue; 4003 4004 for (int OpIdx : OpIndices) { 4005 if (OpIdx == -1) break; 4006 4007 const MCOperand &MO = Inst.getOperand(OpIdx); 4008 if (!MO.isImm() && !MO.isExpr()) 4009 continue; 4010 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4011 continue; 4012 4013 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4014 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4015 Error(getConstLoc(Operands), 4016 "inline constants are not allowed for this operand"); 4017 return false; 4018 } 4019 4020 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4021 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4022 if (NumLiterals == 0 || LiteralValue != Value) { 4023 LiteralValue = Value; 4024 ++NumLiterals; 4025 } 4026 } else if (MO.isExpr()) { 4027 ++NumExprs; 4028 } 4029 } 4030 NumLiterals += NumExprs; 4031 4032 if (!NumLiterals) 4033 return true; 4034 4035 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4036 Error(getLitLoc(Operands), "literal operands are not supported"); 4037 return false; 4038 } 4039 4040 if (NumLiterals > 1) { 4041 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4042 return false; 4043 } 4044 4045 return true; 4046 } 4047 4048 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4049 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4050 const MCRegisterInfo *MRI) { 4051 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4052 if (OpIdx < 0) 4053 return -1; 4054 4055 const MCOperand &Op = Inst.getOperand(OpIdx); 4056 if (!Op.isReg()) 4057 return -1; 4058 4059 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4060 auto Reg = Sub ? Sub : Op.getReg(); 4061 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4062 return AGPR32.contains(Reg) ? 1 : 0; 4063 } 4064 4065 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4066 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4067 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4068 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4069 SIInstrFlags::DS)) == 0) 4070 return true; 4071 4072 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4073 : AMDGPU::OpName::vdata; 4074 4075 const MCRegisterInfo *MRI = getMRI(); 4076 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4077 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4078 4079 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4080 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4081 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4082 return false; 4083 } 4084 4085 auto FB = getFeatureBits(); 4086 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4087 if (DataAreg < 0 || DstAreg < 0) 4088 return true; 4089 return DstAreg == DataAreg; 4090 } 4091 4092 return DstAreg < 1 && DataAreg < 1; 4093 } 4094 4095 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4096 auto FB = getFeatureBits(); 4097 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4098 return true; 4099 4100 const MCRegisterInfo *MRI = getMRI(); 4101 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4102 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4103 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4104 const MCOperand &Op = Inst.getOperand(I); 4105 if (!Op.isReg()) 4106 continue; 4107 4108 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4109 if (!Sub) 4110 continue; 4111 4112 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4113 return false; 4114 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4115 return false; 4116 } 4117 4118 return true; 4119 } 4120 4121 // gfx90a has an undocumented limitation: 4122 // DS_GWS opcodes must use even aligned registers. 4123 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4124 const OperandVector &Operands) { 4125 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4126 return true; 4127 4128 int Opc = Inst.getOpcode(); 4129 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4130 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4131 return true; 4132 4133 const MCRegisterInfo *MRI = getMRI(); 4134 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4135 int Data0Pos = 4136 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4137 assert(Data0Pos != -1); 4138 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4139 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4140 if (RegIdx & 1) { 4141 SMLoc RegLoc = getRegLoc(Reg, Operands); 4142 Error(RegLoc, "vgpr must be even aligned"); 4143 return false; 4144 } 4145 4146 return true; 4147 } 4148 4149 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4150 const OperandVector &Operands, 4151 const SMLoc &IDLoc) { 4152 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4153 AMDGPU::OpName::cpol); 4154 if (CPolPos == -1) 4155 return true; 4156 4157 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4158 4159 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4160 if ((TSFlags & (SIInstrFlags::SMRD)) && 4161 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4162 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4163 return false; 4164 } 4165 4166 if (isGFX90A() && (CPol & CPol::SCC)) { 4167 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4168 StringRef CStr(S.getPointer()); 4169 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4170 Error(S, "scc is not supported on this GPU"); 4171 return false; 4172 } 4173 4174 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4175 return true; 4176 4177 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4178 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4179 Error(IDLoc, "instruction must use glc"); 4180 return false; 4181 } 4182 } else { 4183 if (CPol & CPol::GLC) { 4184 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4185 StringRef CStr(S.getPointer()); 4186 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4187 Error(S, "instruction must not use glc"); 4188 return false; 4189 } 4190 } 4191 4192 return true; 4193 } 4194 4195 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4196 const SMLoc &IDLoc, 4197 const OperandVector &Operands) { 4198 if (auto ErrMsg = validateLdsDirect(Inst)) { 4199 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4200 return false; 4201 } 4202 if (!validateSOPLiteral(Inst)) { 4203 Error(getLitLoc(Operands), 4204 "only one literal operand is allowed"); 4205 return false; 4206 } 4207 if (!validateVOP3Literal(Inst, Operands)) { 4208 return false; 4209 } 4210 if (!validateConstantBusLimitations(Inst, Operands)) { 4211 return false; 4212 } 4213 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4214 return false; 4215 } 4216 if (!validateIntClampSupported(Inst)) { 4217 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4218 "integer clamping is not supported on this GPU"); 4219 return false; 4220 } 4221 if (!validateOpSel(Inst)) { 4222 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4223 "invalid op_sel operand"); 4224 return false; 4225 } 4226 if (!validateDPP(Inst, Operands)) { 4227 return false; 4228 } 4229 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4230 if (!validateMIMGD16(Inst)) { 4231 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4232 "d16 modifier is not supported on this GPU"); 4233 return false; 4234 } 4235 if (!validateMIMGDim(Inst)) { 4236 Error(IDLoc, "dim modifier is required on this GPU"); 4237 return false; 4238 } 4239 if (!validateMIMGMSAA(Inst)) { 4240 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4241 "invalid dim; must be MSAA type"); 4242 return false; 4243 } 4244 if (!validateMIMGDataSize(Inst)) { 4245 Error(IDLoc, 4246 "image data size does not match dmask and tfe"); 4247 return false; 4248 } 4249 if (!validateMIMGAddrSize(Inst)) { 4250 Error(IDLoc, 4251 "image address size does not match dim and a16"); 4252 return false; 4253 } 4254 if (!validateMIMGAtomicDMask(Inst)) { 4255 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4256 "invalid atomic image dmask"); 4257 return false; 4258 } 4259 if (!validateMIMGGatherDMask(Inst)) { 4260 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4261 "invalid image_gather dmask: only one bit must be set"); 4262 return false; 4263 } 4264 if (!validateMovrels(Inst, Operands)) { 4265 return false; 4266 } 4267 if (!validateFlatOffset(Inst, Operands)) { 4268 return false; 4269 } 4270 if (!validateSMEMOffset(Inst, Operands)) { 4271 return false; 4272 } 4273 if (!validateMAIAccWrite(Inst, Operands)) { 4274 return false; 4275 } 4276 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4277 return false; 4278 } 4279 4280 if (!validateAGPRLdSt(Inst)) { 4281 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4282 ? "invalid register class: data and dst should be all VGPR or AGPR" 4283 : "invalid register class: agpr loads and stores not supported on this GPU" 4284 ); 4285 return false; 4286 } 4287 if (!validateVGPRAlign(Inst)) { 4288 Error(IDLoc, 4289 "invalid register class: vgpr tuples must be 64 bit aligned"); 4290 return false; 4291 } 4292 if (!validateGWS(Inst, Operands)) { 4293 return false; 4294 } 4295 4296 if (!validateDivScale(Inst)) { 4297 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4298 return false; 4299 } 4300 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4301 return false; 4302 } 4303 4304 return true; 4305 } 4306 4307 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4308 const FeatureBitset &FBS, 4309 unsigned VariantID = 0); 4310 4311 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4312 const FeatureBitset &AvailableFeatures, 4313 unsigned VariantID); 4314 4315 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4316 const FeatureBitset &FBS) { 4317 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4318 } 4319 4320 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4321 const FeatureBitset &FBS, 4322 ArrayRef<unsigned> Variants) { 4323 for (auto Variant : Variants) { 4324 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4325 return true; 4326 } 4327 4328 return false; 4329 } 4330 4331 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4332 const SMLoc &IDLoc) { 4333 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4334 4335 // Check if requested instruction variant is supported. 4336 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4337 return false; 4338 4339 // This instruction is not supported. 4340 // Clear any other pending errors because they are no longer relevant. 4341 getParser().clearPendingErrors(); 4342 4343 // Requested instruction variant is not supported. 4344 // Check if any other variants are supported. 4345 StringRef VariantName = getMatchedVariantName(); 4346 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4347 return Error(IDLoc, 4348 Twine(VariantName, 4349 " variant of this instruction is not supported")); 4350 } 4351 4352 // Finally check if this instruction is supported on any other GPU. 4353 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4354 return Error(IDLoc, "instruction not supported on this GPU"); 4355 } 4356 4357 // Instruction not supported on any GPU. Probably a typo. 4358 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4359 return Error(IDLoc, "invalid instruction" + Suggestion); 4360 } 4361 4362 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4363 OperandVector &Operands, 4364 MCStreamer &Out, 4365 uint64_t &ErrorInfo, 4366 bool MatchingInlineAsm) { 4367 MCInst Inst; 4368 unsigned Result = Match_Success; 4369 for (auto Variant : getMatchedVariants()) { 4370 uint64_t EI; 4371 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4372 Variant); 4373 // We order match statuses from least to most specific. We use most specific 4374 // status as resulting 4375 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4376 if ((R == Match_Success) || 4377 (R == Match_PreferE32) || 4378 (R == Match_MissingFeature && Result != Match_PreferE32) || 4379 (R == Match_InvalidOperand && Result != Match_MissingFeature 4380 && Result != Match_PreferE32) || 4381 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4382 && Result != Match_MissingFeature 4383 && Result != Match_PreferE32)) { 4384 Result = R; 4385 ErrorInfo = EI; 4386 } 4387 if (R == Match_Success) 4388 break; 4389 } 4390 4391 if (Result == Match_Success) { 4392 if (!validateInstruction(Inst, IDLoc, Operands)) { 4393 return true; 4394 } 4395 Inst.setLoc(IDLoc); 4396 Out.emitInstruction(Inst, getSTI()); 4397 return false; 4398 } 4399 4400 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4401 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4402 return true; 4403 } 4404 4405 switch (Result) { 4406 default: break; 4407 case Match_MissingFeature: 4408 // It has been verified that the specified instruction 4409 // mnemonic is valid. A match was found but it requires 4410 // features which are not supported on this GPU. 4411 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4412 4413 case Match_InvalidOperand: { 4414 SMLoc ErrorLoc = IDLoc; 4415 if (ErrorInfo != ~0ULL) { 4416 if (ErrorInfo >= Operands.size()) { 4417 return Error(IDLoc, "too few operands for instruction"); 4418 } 4419 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4420 if (ErrorLoc == SMLoc()) 4421 ErrorLoc = IDLoc; 4422 } 4423 return Error(ErrorLoc, "invalid operand for instruction"); 4424 } 4425 4426 case Match_PreferE32: 4427 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4428 "should be encoded as e32"); 4429 case Match_MnemonicFail: 4430 llvm_unreachable("Invalid instructions should have been handled already"); 4431 } 4432 llvm_unreachable("Implement any new match types added!"); 4433 } 4434 4435 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4436 int64_t Tmp = -1; 4437 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4438 return true; 4439 } 4440 if (getParser().parseAbsoluteExpression(Tmp)) { 4441 return true; 4442 } 4443 Ret = static_cast<uint32_t>(Tmp); 4444 return false; 4445 } 4446 4447 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4448 uint32_t &Minor) { 4449 if (ParseAsAbsoluteExpression(Major)) 4450 return TokError("invalid major version"); 4451 4452 if (!trySkipToken(AsmToken::Comma)) 4453 return TokError("minor version number required, comma expected"); 4454 4455 if (ParseAsAbsoluteExpression(Minor)) 4456 return TokError("invalid minor version"); 4457 4458 return false; 4459 } 4460 4461 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4462 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4463 return TokError("directive only supported for amdgcn architecture"); 4464 4465 std::string TargetIDDirective; 4466 SMLoc TargetStart = getTok().getLoc(); 4467 if (getParser().parseEscapedString(TargetIDDirective)) 4468 return true; 4469 4470 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4471 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4472 return getParser().Error(TargetRange.Start, 4473 (Twine(".amdgcn_target directive's target id ") + 4474 Twine(TargetIDDirective) + 4475 Twine(" does not match the specified target id ") + 4476 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4477 4478 return false; 4479 } 4480 4481 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4482 return Error(Range.Start, "value out of range", Range); 4483 } 4484 4485 bool AMDGPUAsmParser::calculateGPRBlocks( 4486 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4487 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4488 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4489 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4490 // TODO(scott.linder): These calculations are duplicated from 4491 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4492 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4493 4494 unsigned NumVGPRs = NextFreeVGPR; 4495 unsigned NumSGPRs = NextFreeSGPR; 4496 4497 if (Version.Major >= 10) 4498 NumSGPRs = 0; 4499 else { 4500 unsigned MaxAddressableNumSGPRs = 4501 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4502 4503 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4504 NumSGPRs > MaxAddressableNumSGPRs) 4505 return OutOfRangeError(SGPRRange); 4506 4507 NumSGPRs += 4508 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4509 4510 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4511 NumSGPRs > MaxAddressableNumSGPRs) 4512 return OutOfRangeError(SGPRRange); 4513 4514 if (Features.test(FeatureSGPRInitBug)) 4515 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4516 } 4517 4518 VGPRBlocks = 4519 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4520 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4521 4522 return false; 4523 } 4524 4525 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4526 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4527 return TokError("directive only supported for amdgcn architecture"); 4528 4529 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4530 return TokError("directive only supported for amdhsa OS"); 4531 4532 StringRef KernelName; 4533 if (getParser().parseIdentifier(KernelName)) 4534 return true; 4535 4536 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4537 4538 StringSet<> Seen; 4539 4540 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4541 4542 SMRange VGPRRange; 4543 uint64_t NextFreeVGPR = 0; 4544 uint64_t AccumOffset = 0; 4545 SMRange SGPRRange; 4546 uint64_t NextFreeSGPR = 0; 4547 unsigned UserSGPRCount = 0; 4548 bool ReserveVCC = true; 4549 bool ReserveFlatScr = true; 4550 Optional<bool> EnableWavefrontSize32; 4551 4552 while (true) { 4553 while (trySkipToken(AsmToken::EndOfStatement)); 4554 4555 StringRef ID; 4556 SMRange IDRange = getTok().getLocRange(); 4557 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4558 return true; 4559 4560 if (ID == ".end_amdhsa_kernel") 4561 break; 4562 4563 if (Seen.find(ID) != Seen.end()) 4564 return TokError(".amdhsa_ directives cannot be repeated"); 4565 Seen.insert(ID); 4566 4567 SMLoc ValStart = getLoc(); 4568 int64_t IVal; 4569 if (getParser().parseAbsoluteExpression(IVal)) 4570 return true; 4571 SMLoc ValEnd = getLoc(); 4572 SMRange ValRange = SMRange(ValStart, ValEnd); 4573 4574 if (IVal < 0) 4575 return OutOfRangeError(ValRange); 4576 4577 uint64_t Val = IVal; 4578 4579 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4580 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4581 return OutOfRangeError(RANGE); \ 4582 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4583 4584 if (ID == ".amdhsa_group_segment_fixed_size") { 4585 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4586 return OutOfRangeError(ValRange); 4587 KD.group_segment_fixed_size = Val; 4588 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4589 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4590 return OutOfRangeError(ValRange); 4591 KD.private_segment_fixed_size = Val; 4592 } else if (ID == ".amdhsa_kernarg_size") { 4593 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4594 return OutOfRangeError(ValRange); 4595 KD.kernarg_size = Val; 4596 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4597 if (hasArchitectedFlatScratch()) 4598 return Error(IDRange.Start, 4599 "directive is not supported with architected flat scratch", 4600 IDRange); 4601 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4602 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4603 Val, ValRange); 4604 if (Val) 4605 UserSGPRCount += 4; 4606 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4607 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4608 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4609 ValRange); 4610 if (Val) 4611 UserSGPRCount += 2; 4612 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4613 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4614 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4615 ValRange); 4616 if (Val) 4617 UserSGPRCount += 2; 4618 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4619 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4620 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4621 Val, ValRange); 4622 if (Val) 4623 UserSGPRCount += 2; 4624 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4625 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4626 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4627 ValRange); 4628 if (Val) 4629 UserSGPRCount += 2; 4630 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4631 if (hasArchitectedFlatScratch()) 4632 return Error(IDRange.Start, 4633 "directive is not supported with architected flat scratch", 4634 IDRange); 4635 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4636 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4637 ValRange); 4638 if (Val) 4639 UserSGPRCount += 2; 4640 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4641 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4642 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4643 Val, ValRange); 4644 if (Val) 4645 UserSGPRCount += 1; 4646 } else if (ID == ".amdhsa_wavefront_size32") { 4647 if (IVersion.Major < 10) 4648 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4649 EnableWavefrontSize32 = Val; 4650 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4651 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4652 Val, ValRange); 4653 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4654 if (hasArchitectedFlatScratch()) 4655 return Error(IDRange.Start, 4656 "directive is not supported with architected flat scratch", 4657 IDRange); 4658 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4659 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4660 } else if (ID == ".amdhsa_enable_private_segment") { 4661 if (!hasArchitectedFlatScratch()) 4662 return Error( 4663 IDRange.Start, 4664 "directive is not supported without architected flat scratch", 4665 IDRange); 4666 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4667 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4668 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4669 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4670 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4671 ValRange); 4672 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4673 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4674 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4675 ValRange); 4676 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4677 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4678 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4679 ValRange); 4680 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4681 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4682 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4683 ValRange); 4684 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4685 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4686 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4687 ValRange); 4688 } else if (ID == ".amdhsa_next_free_vgpr") { 4689 VGPRRange = ValRange; 4690 NextFreeVGPR = Val; 4691 } else if (ID == ".amdhsa_next_free_sgpr") { 4692 SGPRRange = ValRange; 4693 NextFreeSGPR = Val; 4694 } else if (ID == ".amdhsa_accum_offset") { 4695 if (!isGFX90A()) 4696 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4697 AccumOffset = Val; 4698 } else if (ID == ".amdhsa_reserve_vcc") { 4699 if (!isUInt<1>(Val)) 4700 return OutOfRangeError(ValRange); 4701 ReserveVCC = Val; 4702 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4703 if (IVersion.Major < 7) 4704 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4705 if (hasArchitectedFlatScratch()) 4706 return Error(IDRange.Start, 4707 "directive is not supported with architected flat scratch", 4708 IDRange); 4709 if (!isUInt<1>(Val)) 4710 return OutOfRangeError(ValRange); 4711 ReserveFlatScr = Val; 4712 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4713 if (IVersion.Major < 8) 4714 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4715 if (!isUInt<1>(Val)) 4716 return OutOfRangeError(ValRange); 4717 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4718 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4719 IDRange); 4720 } else if (ID == ".amdhsa_float_round_mode_32") { 4721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4722 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4723 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4724 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4725 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4726 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4727 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4728 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4729 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4730 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4731 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4732 ValRange); 4733 } else if (ID == ".amdhsa_dx10_clamp") { 4734 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4735 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4736 } else if (ID == ".amdhsa_ieee_mode") { 4737 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4738 Val, ValRange); 4739 } else if (ID == ".amdhsa_fp16_overflow") { 4740 if (IVersion.Major < 9) 4741 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4742 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4743 ValRange); 4744 } else if (ID == ".amdhsa_tg_split") { 4745 if (!isGFX90A()) 4746 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4747 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4748 ValRange); 4749 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4750 if (IVersion.Major < 10) 4751 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4752 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4753 ValRange); 4754 } else if (ID == ".amdhsa_memory_ordered") { 4755 if (IVersion.Major < 10) 4756 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4757 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4758 ValRange); 4759 } else if (ID == ".amdhsa_forward_progress") { 4760 if (IVersion.Major < 10) 4761 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4763 ValRange); 4764 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4765 PARSE_BITS_ENTRY( 4766 KD.compute_pgm_rsrc2, 4767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4768 ValRange); 4769 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4772 Val, ValRange); 4773 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4774 PARSE_BITS_ENTRY( 4775 KD.compute_pgm_rsrc2, 4776 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4777 ValRange); 4778 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4779 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4780 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4781 Val, ValRange); 4782 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4783 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4784 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4785 Val, ValRange); 4786 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4787 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4788 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4789 Val, ValRange); 4790 } else if (ID == ".amdhsa_exception_int_div_zero") { 4791 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4792 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4793 Val, ValRange); 4794 } else { 4795 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4796 } 4797 4798 #undef PARSE_BITS_ENTRY 4799 } 4800 4801 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4802 return TokError(".amdhsa_next_free_vgpr directive is required"); 4803 4804 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4805 return TokError(".amdhsa_next_free_sgpr directive is required"); 4806 4807 unsigned VGPRBlocks; 4808 unsigned SGPRBlocks; 4809 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4810 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4811 EnableWavefrontSize32, NextFreeVGPR, 4812 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4813 SGPRBlocks)) 4814 return true; 4815 4816 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4817 VGPRBlocks)) 4818 return OutOfRangeError(VGPRRange); 4819 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4820 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4821 4822 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4823 SGPRBlocks)) 4824 return OutOfRangeError(SGPRRange); 4825 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4826 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4827 SGPRBlocks); 4828 4829 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4830 return TokError("too many user SGPRs enabled"); 4831 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4832 UserSGPRCount); 4833 4834 if (isGFX90A()) { 4835 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4836 return TokError(".amdhsa_accum_offset directive is required"); 4837 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4838 return TokError("accum_offset should be in range [4..256] in " 4839 "increments of 4"); 4840 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4841 return TokError("accum_offset exceeds total VGPR allocation"); 4842 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4843 (AccumOffset / 4 - 1)); 4844 } 4845 4846 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4847 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4848 ReserveFlatScr); 4849 return false; 4850 } 4851 4852 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4853 uint32_t Major; 4854 uint32_t Minor; 4855 4856 if (ParseDirectiveMajorMinor(Major, Minor)) 4857 return true; 4858 4859 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4860 return false; 4861 } 4862 4863 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4864 uint32_t Major; 4865 uint32_t Minor; 4866 uint32_t Stepping; 4867 StringRef VendorName; 4868 StringRef ArchName; 4869 4870 // If this directive has no arguments, then use the ISA version for the 4871 // targeted GPU. 4872 if (isToken(AsmToken::EndOfStatement)) { 4873 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4874 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4875 ISA.Stepping, 4876 "AMD", "AMDGPU"); 4877 return false; 4878 } 4879 4880 if (ParseDirectiveMajorMinor(Major, Minor)) 4881 return true; 4882 4883 if (!trySkipToken(AsmToken::Comma)) 4884 return TokError("stepping version number required, comma expected"); 4885 4886 if (ParseAsAbsoluteExpression(Stepping)) 4887 return TokError("invalid stepping version"); 4888 4889 if (!trySkipToken(AsmToken::Comma)) 4890 return TokError("vendor name required, comma expected"); 4891 4892 if (!parseString(VendorName, "invalid vendor name")) 4893 return true; 4894 4895 if (!trySkipToken(AsmToken::Comma)) 4896 return TokError("arch name required, comma expected"); 4897 4898 if (!parseString(ArchName, "invalid arch name")) 4899 return true; 4900 4901 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4902 VendorName, ArchName); 4903 return false; 4904 } 4905 4906 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4907 amd_kernel_code_t &Header) { 4908 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4909 // assembly for backwards compatibility. 4910 if (ID == "max_scratch_backing_memory_byte_size") { 4911 Parser.eatToEndOfStatement(); 4912 return false; 4913 } 4914 4915 SmallString<40> ErrStr; 4916 raw_svector_ostream Err(ErrStr); 4917 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4918 return TokError(Err.str()); 4919 } 4920 Lex(); 4921 4922 if (ID == "enable_wavefront_size32") { 4923 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4924 if (!isGFX10Plus()) 4925 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4926 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4927 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4928 } else { 4929 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4930 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4931 } 4932 } 4933 4934 if (ID == "wavefront_size") { 4935 if (Header.wavefront_size == 5) { 4936 if (!isGFX10Plus()) 4937 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4938 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4939 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4940 } else if (Header.wavefront_size == 6) { 4941 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4942 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4943 } 4944 } 4945 4946 if (ID == "enable_wgp_mode") { 4947 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4948 !isGFX10Plus()) 4949 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4950 } 4951 4952 if (ID == "enable_mem_ordered") { 4953 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4954 !isGFX10Plus()) 4955 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4956 } 4957 4958 if (ID == "enable_fwd_progress") { 4959 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4960 !isGFX10Plus()) 4961 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4962 } 4963 4964 return false; 4965 } 4966 4967 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4968 amd_kernel_code_t Header; 4969 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4970 4971 while (true) { 4972 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4973 // will set the current token to EndOfStatement. 4974 while(trySkipToken(AsmToken::EndOfStatement)); 4975 4976 StringRef ID; 4977 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4978 return true; 4979 4980 if (ID == ".end_amd_kernel_code_t") 4981 break; 4982 4983 if (ParseAMDKernelCodeTValue(ID, Header)) 4984 return true; 4985 } 4986 4987 getTargetStreamer().EmitAMDKernelCodeT(Header); 4988 4989 return false; 4990 } 4991 4992 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4993 StringRef KernelName; 4994 if (!parseId(KernelName, "expected symbol name")) 4995 return true; 4996 4997 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4998 ELF::STT_AMDGPU_HSA_KERNEL); 4999 5000 KernelScope.initialize(getContext()); 5001 return false; 5002 } 5003 5004 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5005 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5006 return Error(getLoc(), 5007 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5008 "architectures"); 5009 } 5010 5011 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5012 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5013 return Error(getParser().getTok().getLoc(), "target id must match options"); 5014 5015 getTargetStreamer().EmitISAVersion(); 5016 Lex(); 5017 5018 return false; 5019 } 5020 5021 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5022 const char *AssemblerDirectiveBegin; 5023 const char *AssemblerDirectiveEnd; 5024 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5025 isHsaAbiVersion3Or4(&getSTI()) 5026 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5027 HSAMD::V3::AssemblerDirectiveEnd) 5028 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5029 HSAMD::AssemblerDirectiveEnd); 5030 5031 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5032 return Error(getLoc(), 5033 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5034 "not available on non-amdhsa OSes")).str()); 5035 } 5036 5037 std::string HSAMetadataString; 5038 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5039 HSAMetadataString)) 5040 return true; 5041 5042 if (isHsaAbiVersion3Or4(&getSTI())) { 5043 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5044 return Error(getLoc(), "invalid HSA metadata"); 5045 } else { 5046 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5047 return Error(getLoc(), "invalid HSA metadata"); 5048 } 5049 5050 return false; 5051 } 5052 5053 /// Common code to parse out a block of text (typically YAML) between start and 5054 /// end directives. 5055 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5056 const char *AssemblerDirectiveEnd, 5057 std::string &CollectString) { 5058 5059 raw_string_ostream CollectStream(CollectString); 5060 5061 getLexer().setSkipSpace(false); 5062 5063 bool FoundEnd = false; 5064 while (!isToken(AsmToken::Eof)) { 5065 while (isToken(AsmToken::Space)) { 5066 CollectStream << getTokenStr(); 5067 Lex(); 5068 } 5069 5070 if (trySkipId(AssemblerDirectiveEnd)) { 5071 FoundEnd = true; 5072 break; 5073 } 5074 5075 CollectStream << Parser.parseStringToEndOfStatement() 5076 << getContext().getAsmInfo()->getSeparatorString(); 5077 5078 Parser.eatToEndOfStatement(); 5079 } 5080 5081 getLexer().setSkipSpace(true); 5082 5083 if (isToken(AsmToken::Eof) && !FoundEnd) { 5084 return TokError(Twine("expected directive ") + 5085 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5086 } 5087 5088 CollectStream.flush(); 5089 return false; 5090 } 5091 5092 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5093 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5094 std::string String; 5095 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5096 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5097 return true; 5098 5099 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5100 if (!PALMetadata->setFromString(String)) 5101 return Error(getLoc(), "invalid PAL metadata"); 5102 return false; 5103 } 5104 5105 /// Parse the assembler directive for old linear-format PAL metadata. 5106 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5107 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5108 return Error(getLoc(), 5109 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5110 "not available on non-amdpal OSes")).str()); 5111 } 5112 5113 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5114 PALMetadata->setLegacy(); 5115 for (;;) { 5116 uint32_t Key, Value; 5117 if (ParseAsAbsoluteExpression(Key)) { 5118 return TokError(Twine("invalid value in ") + 5119 Twine(PALMD::AssemblerDirective)); 5120 } 5121 if (!trySkipToken(AsmToken::Comma)) { 5122 return TokError(Twine("expected an even number of values in ") + 5123 Twine(PALMD::AssemblerDirective)); 5124 } 5125 if (ParseAsAbsoluteExpression(Value)) { 5126 return TokError(Twine("invalid value in ") + 5127 Twine(PALMD::AssemblerDirective)); 5128 } 5129 PALMetadata->setRegister(Key, Value); 5130 if (!trySkipToken(AsmToken::Comma)) 5131 break; 5132 } 5133 return false; 5134 } 5135 5136 /// ParseDirectiveAMDGPULDS 5137 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5138 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5139 if (getParser().checkForValidSection()) 5140 return true; 5141 5142 StringRef Name; 5143 SMLoc NameLoc = getLoc(); 5144 if (getParser().parseIdentifier(Name)) 5145 return TokError("expected identifier in directive"); 5146 5147 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5148 if (parseToken(AsmToken::Comma, "expected ','")) 5149 return true; 5150 5151 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5152 5153 int64_t Size; 5154 SMLoc SizeLoc = getLoc(); 5155 if (getParser().parseAbsoluteExpression(Size)) 5156 return true; 5157 if (Size < 0) 5158 return Error(SizeLoc, "size must be non-negative"); 5159 if (Size > LocalMemorySize) 5160 return Error(SizeLoc, "size is too large"); 5161 5162 int64_t Alignment = 4; 5163 if (trySkipToken(AsmToken::Comma)) { 5164 SMLoc AlignLoc = getLoc(); 5165 if (getParser().parseAbsoluteExpression(Alignment)) 5166 return true; 5167 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5168 return Error(AlignLoc, "alignment must be a power of two"); 5169 5170 // Alignment larger than the size of LDS is possible in theory, as long 5171 // as the linker manages to place to symbol at address 0, but we do want 5172 // to make sure the alignment fits nicely into a 32-bit integer. 5173 if (Alignment >= 1u << 31) 5174 return Error(AlignLoc, "alignment is too large"); 5175 } 5176 5177 if (parseToken(AsmToken::EndOfStatement, 5178 "unexpected token in '.amdgpu_lds' directive")) 5179 return true; 5180 5181 Symbol->redefineIfPossible(); 5182 if (!Symbol->isUndefined()) 5183 return Error(NameLoc, "invalid symbol redefinition"); 5184 5185 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5186 return false; 5187 } 5188 5189 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5190 StringRef IDVal = DirectiveID.getString(); 5191 5192 if (isHsaAbiVersion3Or4(&getSTI())) { 5193 if (IDVal == ".amdhsa_kernel") 5194 return ParseDirectiveAMDHSAKernel(); 5195 5196 // TODO: Restructure/combine with PAL metadata directive. 5197 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5198 return ParseDirectiveHSAMetadata(); 5199 } else { 5200 if (IDVal == ".hsa_code_object_version") 5201 return ParseDirectiveHSACodeObjectVersion(); 5202 5203 if (IDVal == ".hsa_code_object_isa") 5204 return ParseDirectiveHSACodeObjectISA(); 5205 5206 if (IDVal == ".amd_kernel_code_t") 5207 return ParseDirectiveAMDKernelCodeT(); 5208 5209 if (IDVal == ".amdgpu_hsa_kernel") 5210 return ParseDirectiveAMDGPUHsaKernel(); 5211 5212 if (IDVal == ".amd_amdgpu_isa") 5213 return ParseDirectiveISAVersion(); 5214 5215 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5216 return ParseDirectiveHSAMetadata(); 5217 } 5218 5219 if (IDVal == ".amdgcn_target") 5220 return ParseDirectiveAMDGCNTarget(); 5221 5222 if (IDVal == ".amdgpu_lds") 5223 return ParseDirectiveAMDGPULDS(); 5224 5225 if (IDVal == PALMD::AssemblerDirectiveBegin) 5226 return ParseDirectivePALMetadataBegin(); 5227 5228 if (IDVal == PALMD::AssemblerDirective) 5229 return ParseDirectivePALMetadata(); 5230 5231 return true; 5232 } 5233 5234 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5235 unsigned RegNo) { 5236 5237 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5238 R.isValid(); ++R) { 5239 if (*R == RegNo) 5240 return isGFX9Plus(); 5241 } 5242 5243 // GFX10 has 2 more SGPRs 104 and 105. 5244 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5245 R.isValid(); ++R) { 5246 if (*R == RegNo) 5247 return hasSGPR104_SGPR105(); 5248 } 5249 5250 switch (RegNo) { 5251 case AMDGPU::SRC_SHARED_BASE: 5252 case AMDGPU::SRC_SHARED_LIMIT: 5253 case AMDGPU::SRC_PRIVATE_BASE: 5254 case AMDGPU::SRC_PRIVATE_LIMIT: 5255 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5256 return isGFX9Plus(); 5257 case AMDGPU::TBA: 5258 case AMDGPU::TBA_LO: 5259 case AMDGPU::TBA_HI: 5260 case AMDGPU::TMA: 5261 case AMDGPU::TMA_LO: 5262 case AMDGPU::TMA_HI: 5263 return !isGFX9Plus(); 5264 case AMDGPU::XNACK_MASK: 5265 case AMDGPU::XNACK_MASK_LO: 5266 case AMDGPU::XNACK_MASK_HI: 5267 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5268 case AMDGPU::SGPR_NULL: 5269 return isGFX10Plus(); 5270 default: 5271 break; 5272 } 5273 5274 if (isCI()) 5275 return true; 5276 5277 if (isSI() || isGFX10Plus()) { 5278 // No flat_scr on SI. 5279 // On GFX10 flat scratch is not a valid register operand and can only be 5280 // accessed with s_setreg/s_getreg. 5281 switch (RegNo) { 5282 case AMDGPU::FLAT_SCR: 5283 case AMDGPU::FLAT_SCR_LO: 5284 case AMDGPU::FLAT_SCR_HI: 5285 return false; 5286 default: 5287 return true; 5288 } 5289 } 5290 5291 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5292 // SI/CI have. 5293 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5294 R.isValid(); ++R) { 5295 if (*R == RegNo) 5296 return hasSGPR102_SGPR103(); 5297 } 5298 5299 return true; 5300 } 5301 5302 OperandMatchResultTy 5303 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5304 OperandMode Mode) { 5305 // Try to parse with a custom parser 5306 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5307 5308 // If we successfully parsed the operand or if there as an error parsing, 5309 // we are done. 5310 // 5311 // If we are parsing after we reach EndOfStatement then this means we 5312 // are appending default values to the Operands list. This is only done 5313 // by custom parser, so we shouldn't continue on to the generic parsing. 5314 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5315 isToken(AsmToken::EndOfStatement)) 5316 return ResTy; 5317 5318 SMLoc RBraceLoc; 5319 SMLoc LBraceLoc = getLoc(); 5320 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5321 unsigned Prefix = Operands.size(); 5322 5323 for (;;) { 5324 auto Loc = getLoc(); 5325 ResTy = parseReg(Operands); 5326 if (ResTy == MatchOperand_NoMatch) 5327 Error(Loc, "expected a register"); 5328 if (ResTy != MatchOperand_Success) 5329 return MatchOperand_ParseFail; 5330 5331 RBraceLoc = getLoc(); 5332 if (trySkipToken(AsmToken::RBrac)) 5333 break; 5334 5335 if (!skipToken(AsmToken::Comma, 5336 "expected a comma or a closing square bracket")) { 5337 return MatchOperand_ParseFail; 5338 } 5339 } 5340 5341 if (Operands.size() - Prefix > 1) { 5342 Operands.insert(Operands.begin() + Prefix, 5343 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5344 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5345 } 5346 5347 return MatchOperand_Success; 5348 } 5349 5350 return parseRegOrImm(Operands); 5351 } 5352 5353 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5354 // Clear any forced encodings from the previous instruction. 5355 setForcedEncodingSize(0); 5356 setForcedDPP(false); 5357 setForcedSDWA(false); 5358 5359 if (Name.endswith("_e64")) { 5360 setForcedEncodingSize(64); 5361 return Name.substr(0, Name.size() - 4); 5362 } else if (Name.endswith("_e32")) { 5363 setForcedEncodingSize(32); 5364 return Name.substr(0, Name.size() - 4); 5365 } else if (Name.endswith("_dpp")) { 5366 setForcedDPP(true); 5367 return Name.substr(0, Name.size() - 4); 5368 } else if (Name.endswith("_sdwa")) { 5369 setForcedSDWA(true); 5370 return Name.substr(0, Name.size() - 5); 5371 } 5372 return Name; 5373 } 5374 5375 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5376 StringRef Name, 5377 SMLoc NameLoc, OperandVector &Operands) { 5378 // Add the instruction mnemonic 5379 Name = parseMnemonicSuffix(Name); 5380 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5381 5382 bool IsMIMG = Name.startswith("image_"); 5383 5384 while (!trySkipToken(AsmToken::EndOfStatement)) { 5385 OperandMode Mode = OperandMode_Default; 5386 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5387 Mode = OperandMode_NSA; 5388 CPolSeen = 0; 5389 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5390 5391 if (Res != MatchOperand_Success) { 5392 checkUnsupportedInstruction(Name, NameLoc); 5393 if (!Parser.hasPendingError()) { 5394 // FIXME: use real operand location rather than the current location. 5395 StringRef Msg = 5396 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5397 "not a valid operand."; 5398 Error(getLoc(), Msg); 5399 } 5400 while (!trySkipToken(AsmToken::EndOfStatement)) { 5401 lex(); 5402 } 5403 return true; 5404 } 5405 5406 // Eat the comma or space if there is one. 5407 trySkipToken(AsmToken::Comma); 5408 } 5409 5410 return false; 5411 } 5412 5413 //===----------------------------------------------------------------------===// 5414 // Utility functions 5415 //===----------------------------------------------------------------------===// 5416 5417 OperandMatchResultTy 5418 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5419 5420 if (!trySkipId(Prefix, AsmToken::Colon)) 5421 return MatchOperand_NoMatch; 5422 5423 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5424 } 5425 5426 OperandMatchResultTy 5427 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5428 AMDGPUOperand::ImmTy ImmTy, 5429 bool (*ConvertResult)(int64_t&)) { 5430 SMLoc S = getLoc(); 5431 int64_t Value = 0; 5432 5433 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5434 if (Res != MatchOperand_Success) 5435 return Res; 5436 5437 if (ConvertResult && !ConvertResult(Value)) { 5438 Error(S, "invalid " + StringRef(Prefix) + " value."); 5439 } 5440 5441 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5442 return MatchOperand_Success; 5443 } 5444 5445 OperandMatchResultTy 5446 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5447 OperandVector &Operands, 5448 AMDGPUOperand::ImmTy ImmTy, 5449 bool (*ConvertResult)(int64_t&)) { 5450 SMLoc S = getLoc(); 5451 if (!trySkipId(Prefix, AsmToken::Colon)) 5452 return MatchOperand_NoMatch; 5453 5454 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5455 return MatchOperand_ParseFail; 5456 5457 unsigned Val = 0; 5458 const unsigned MaxSize = 4; 5459 5460 // FIXME: How to verify the number of elements matches the number of src 5461 // operands? 5462 for (int I = 0; ; ++I) { 5463 int64_t Op; 5464 SMLoc Loc = getLoc(); 5465 if (!parseExpr(Op)) 5466 return MatchOperand_ParseFail; 5467 5468 if (Op != 0 && Op != 1) { 5469 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5470 return MatchOperand_ParseFail; 5471 } 5472 5473 Val |= (Op << I); 5474 5475 if (trySkipToken(AsmToken::RBrac)) 5476 break; 5477 5478 if (I + 1 == MaxSize) { 5479 Error(getLoc(), "expected a closing square bracket"); 5480 return MatchOperand_ParseFail; 5481 } 5482 5483 if (!skipToken(AsmToken::Comma, "expected a comma")) 5484 return MatchOperand_ParseFail; 5485 } 5486 5487 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5488 return MatchOperand_Success; 5489 } 5490 5491 OperandMatchResultTy 5492 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5493 AMDGPUOperand::ImmTy ImmTy) { 5494 int64_t Bit; 5495 SMLoc S = getLoc(); 5496 5497 if (trySkipId(Name)) { 5498 Bit = 1; 5499 } else if (trySkipId("no", Name)) { 5500 Bit = 0; 5501 } else { 5502 return MatchOperand_NoMatch; 5503 } 5504 5505 if (Name == "r128" && !hasMIMG_R128()) { 5506 Error(S, "r128 modifier is not supported on this GPU"); 5507 return MatchOperand_ParseFail; 5508 } 5509 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5510 Error(S, "a16 modifier is not supported on this GPU"); 5511 return MatchOperand_ParseFail; 5512 } 5513 5514 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5515 ImmTy = AMDGPUOperand::ImmTyR128A16; 5516 5517 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5518 return MatchOperand_Success; 5519 } 5520 5521 OperandMatchResultTy 5522 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5523 unsigned CPolOn = 0; 5524 unsigned CPolOff = 0; 5525 SMLoc S = getLoc(); 5526 5527 if (trySkipId("glc")) 5528 CPolOn = AMDGPU::CPol::GLC; 5529 else if (trySkipId("noglc")) 5530 CPolOff = AMDGPU::CPol::GLC; 5531 else if (trySkipId("slc")) 5532 CPolOn = AMDGPU::CPol::SLC; 5533 else if (trySkipId("noslc")) 5534 CPolOff = AMDGPU::CPol::SLC; 5535 else if (trySkipId("dlc")) 5536 CPolOn = AMDGPU::CPol::DLC; 5537 else if (trySkipId("nodlc")) 5538 CPolOff = AMDGPU::CPol::DLC; 5539 else if (trySkipId("scc")) 5540 CPolOn = AMDGPU::CPol::SCC; 5541 else if (trySkipId("noscc")) 5542 CPolOff = AMDGPU::CPol::SCC; 5543 else 5544 return MatchOperand_NoMatch; 5545 5546 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5547 Error(S, "dlc modifier is not supported on this GPU"); 5548 return MatchOperand_ParseFail; 5549 } 5550 5551 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5552 Error(S, "scc modifier is not supported on this GPU"); 5553 return MatchOperand_ParseFail; 5554 } 5555 5556 if (CPolSeen & (CPolOn | CPolOff)) { 5557 Error(S, "duplicate cache policy modifier"); 5558 return MatchOperand_ParseFail; 5559 } 5560 5561 CPolSeen |= (CPolOn | CPolOff); 5562 5563 for (unsigned I = 1; I != Operands.size(); ++I) { 5564 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5565 if (Op.isCPol()) { 5566 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5567 return MatchOperand_Success; 5568 } 5569 } 5570 5571 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5572 AMDGPUOperand::ImmTyCPol)); 5573 5574 return MatchOperand_Success; 5575 } 5576 5577 static void addOptionalImmOperand( 5578 MCInst& Inst, const OperandVector& Operands, 5579 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5580 AMDGPUOperand::ImmTy ImmT, 5581 int64_t Default = 0) { 5582 auto i = OptionalIdx.find(ImmT); 5583 if (i != OptionalIdx.end()) { 5584 unsigned Idx = i->second; 5585 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5586 } else { 5587 Inst.addOperand(MCOperand::createImm(Default)); 5588 } 5589 } 5590 5591 OperandMatchResultTy 5592 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5593 StringRef &Value, 5594 SMLoc &StringLoc) { 5595 if (!trySkipId(Prefix, AsmToken::Colon)) 5596 return MatchOperand_NoMatch; 5597 5598 StringLoc = getLoc(); 5599 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5600 : MatchOperand_ParseFail; 5601 } 5602 5603 //===----------------------------------------------------------------------===// 5604 // MTBUF format 5605 //===----------------------------------------------------------------------===// 5606 5607 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5608 int64_t MaxVal, 5609 int64_t &Fmt) { 5610 int64_t Val; 5611 SMLoc Loc = getLoc(); 5612 5613 auto Res = parseIntWithPrefix(Pref, Val); 5614 if (Res == MatchOperand_ParseFail) 5615 return false; 5616 if (Res == MatchOperand_NoMatch) 5617 return true; 5618 5619 if (Val < 0 || Val > MaxVal) { 5620 Error(Loc, Twine("out of range ", StringRef(Pref))); 5621 return false; 5622 } 5623 5624 Fmt = Val; 5625 return true; 5626 } 5627 5628 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5629 // values to live in a joint format operand in the MCInst encoding. 5630 OperandMatchResultTy 5631 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5632 using namespace llvm::AMDGPU::MTBUFFormat; 5633 5634 int64_t Dfmt = DFMT_UNDEF; 5635 int64_t Nfmt = NFMT_UNDEF; 5636 5637 // dfmt and nfmt can appear in either order, and each is optional. 5638 for (int I = 0; I < 2; ++I) { 5639 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5640 return MatchOperand_ParseFail; 5641 5642 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5643 return MatchOperand_ParseFail; 5644 } 5645 // Skip optional comma between dfmt/nfmt 5646 // but guard against 2 commas following each other. 5647 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5648 !peekToken().is(AsmToken::Comma)) { 5649 trySkipToken(AsmToken::Comma); 5650 } 5651 } 5652 5653 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5654 return MatchOperand_NoMatch; 5655 5656 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5657 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5658 5659 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5660 return MatchOperand_Success; 5661 } 5662 5663 OperandMatchResultTy 5664 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5665 using namespace llvm::AMDGPU::MTBUFFormat; 5666 5667 int64_t Fmt = UFMT_UNDEF; 5668 5669 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5670 return MatchOperand_ParseFail; 5671 5672 if (Fmt == UFMT_UNDEF) 5673 return MatchOperand_NoMatch; 5674 5675 Format = Fmt; 5676 return MatchOperand_Success; 5677 } 5678 5679 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5680 int64_t &Nfmt, 5681 StringRef FormatStr, 5682 SMLoc Loc) { 5683 using namespace llvm::AMDGPU::MTBUFFormat; 5684 int64_t Format; 5685 5686 Format = getDfmt(FormatStr); 5687 if (Format != DFMT_UNDEF) { 5688 Dfmt = Format; 5689 return true; 5690 } 5691 5692 Format = getNfmt(FormatStr, getSTI()); 5693 if (Format != NFMT_UNDEF) { 5694 Nfmt = Format; 5695 return true; 5696 } 5697 5698 Error(Loc, "unsupported format"); 5699 return false; 5700 } 5701 5702 OperandMatchResultTy 5703 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5704 SMLoc FormatLoc, 5705 int64_t &Format) { 5706 using namespace llvm::AMDGPU::MTBUFFormat; 5707 5708 int64_t Dfmt = DFMT_UNDEF; 5709 int64_t Nfmt = NFMT_UNDEF; 5710 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5711 return MatchOperand_ParseFail; 5712 5713 if (trySkipToken(AsmToken::Comma)) { 5714 StringRef Str; 5715 SMLoc Loc = getLoc(); 5716 if (!parseId(Str, "expected a format string") || 5717 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5718 return MatchOperand_ParseFail; 5719 } 5720 if (Dfmt == DFMT_UNDEF) { 5721 Error(Loc, "duplicate numeric format"); 5722 return MatchOperand_ParseFail; 5723 } else if (Nfmt == NFMT_UNDEF) { 5724 Error(Loc, "duplicate data format"); 5725 return MatchOperand_ParseFail; 5726 } 5727 } 5728 5729 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5730 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5731 5732 if (isGFX10Plus()) { 5733 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5734 if (Ufmt == UFMT_UNDEF) { 5735 Error(FormatLoc, "unsupported format"); 5736 return MatchOperand_ParseFail; 5737 } 5738 Format = Ufmt; 5739 } else { 5740 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5741 } 5742 5743 return MatchOperand_Success; 5744 } 5745 5746 OperandMatchResultTy 5747 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5748 SMLoc Loc, 5749 int64_t &Format) { 5750 using namespace llvm::AMDGPU::MTBUFFormat; 5751 5752 auto Id = getUnifiedFormat(FormatStr); 5753 if (Id == UFMT_UNDEF) 5754 return MatchOperand_NoMatch; 5755 5756 if (!isGFX10Plus()) { 5757 Error(Loc, "unified format is not supported on this GPU"); 5758 return MatchOperand_ParseFail; 5759 } 5760 5761 Format = Id; 5762 return MatchOperand_Success; 5763 } 5764 5765 OperandMatchResultTy 5766 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5767 using namespace llvm::AMDGPU::MTBUFFormat; 5768 SMLoc Loc = getLoc(); 5769 5770 if (!parseExpr(Format)) 5771 return MatchOperand_ParseFail; 5772 if (!isValidFormatEncoding(Format, getSTI())) { 5773 Error(Loc, "out of range format"); 5774 return MatchOperand_ParseFail; 5775 } 5776 5777 return MatchOperand_Success; 5778 } 5779 5780 OperandMatchResultTy 5781 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5782 using namespace llvm::AMDGPU::MTBUFFormat; 5783 5784 if (!trySkipId("format", AsmToken::Colon)) 5785 return MatchOperand_NoMatch; 5786 5787 if (trySkipToken(AsmToken::LBrac)) { 5788 StringRef FormatStr; 5789 SMLoc Loc = getLoc(); 5790 if (!parseId(FormatStr, "expected a format string")) 5791 return MatchOperand_ParseFail; 5792 5793 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5794 if (Res == MatchOperand_NoMatch) 5795 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5796 if (Res != MatchOperand_Success) 5797 return Res; 5798 5799 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5800 return MatchOperand_ParseFail; 5801 5802 return MatchOperand_Success; 5803 } 5804 5805 return parseNumericFormat(Format); 5806 } 5807 5808 OperandMatchResultTy 5809 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5810 using namespace llvm::AMDGPU::MTBUFFormat; 5811 5812 int64_t Format = getDefaultFormatEncoding(getSTI()); 5813 OperandMatchResultTy Res; 5814 SMLoc Loc = getLoc(); 5815 5816 // Parse legacy format syntax. 5817 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5818 if (Res == MatchOperand_ParseFail) 5819 return Res; 5820 5821 bool FormatFound = (Res == MatchOperand_Success); 5822 5823 Operands.push_back( 5824 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5825 5826 if (FormatFound) 5827 trySkipToken(AsmToken::Comma); 5828 5829 if (isToken(AsmToken::EndOfStatement)) { 5830 // We are expecting an soffset operand, 5831 // but let matcher handle the error. 5832 return MatchOperand_Success; 5833 } 5834 5835 // Parse soffset. 5836 Res = parseRegOrImm(Operands); 5837 if (Res != MatchOperand_Success) 5838 return Res; 5839 5840 trySkipToken(AsmToken::Comma); 5841 5842 if (!FormatFound) { 5843 Res = parseSymbolicOrNumericFormat(Format); 5844 if (Res == MatchOperand_ParseFail) 5845 return Res; 5846 if (Res == MatchOperand_Success) { 5847 auto Size = Operands.size(); 5848 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5849 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5850 Op.setImm(Format); 5851 } 5852 return MatchOperand_Success; 5853 } 5854 5855 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5856 Error(getLoc(), "duplicate format"); 5857 return MatchOperand_ParseFail; 5858 } 5859 return MatchOperand_Success; 5860 } 5861 5862 //===----------------------------------------------------------------------===// 5863 // ds 5864 //===----------------------------------------------------------------------===// 5865 5866 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5867 const OperandVector &Operands) { 5868 OptionalImmIndexMap OptionalIdx; 5869 5870 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5871 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5872 5873 // Add the register arguments 5874 if (Op.isReg()) { 5875 Op.addRegOperands(Inst, 1); 5876 continue; 5877 } 5878 5879 // Handle optional arguments 5880 OptionalIdx[Op.getImmTy()] = i; 5881 } 5882 5883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5884 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5886 5887 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5888 } 5889 5890 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5891 bool IsGdsHardcoded) { 5892 OptionalImmIndexMap OptionalIdx; 5893 5894 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5895 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5896 5897 // Add the register arguments 5898 if (Op.isReg()) { 5899 Op.addRegOperands(Inst, 1); 5900 continue; 5901 } 5902 5903 if (Op.isToken() && Op.getToken() == "gds") { 5904 IsGdsHardcoded = true; 5905 continue; 5906 } 5907 5908 // Handle optional arguments 5909 OptionalIdx[Op.getImmTy()] = i; 5910 } 5911 5912 AMDGPUOperand::ImmTy OffsetType = 5913 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5914 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5915 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5916 AMDGPUOperand::ImmTyOffset; 5917 5918 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5919 5920 if (!IsGdsHardcoded) { 5921 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5922 } 5923 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5924 } 5925 5926 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5927 OptionalImmIndexMap OptionalIdx; 5928 5929 unsigned OperandIdx[4]; 5930 unsigned EnMask = 0; 5931 int SrcIdx = 0; 5932 5933 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5934 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5935 5936 // Add the register arguments 5937 if (Op.isReg()) { 5938 assert(SrcIdx < 4); 5939 OperandIdx[SrcIdx] = Inst.size(); 5940 Op.addRegOperands(Inst, 1); 5941 ++SrcIdx; 5942 continue; 5943 } 5944 5945 if (Op.isOff()) { 5946 assert(SrcIdx < 4); 5947 OperandIdx[SrcIdx] = Inst.size(); 5948 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5949 ++SrcIdx; 5950 continue; 5951 } 5952 5953 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5954 Op.addImmOperands(Inst, 1); 5955 continue; 5956 } 5957 5958 if (Op.isToken() && Op.getToken() == "done") 5959 continue; 5960 5961 // Handle optional arguments 5962 OptionalIdx[Op.getImmTy()] = i; 5963 } 5964 5965 assert(SrcIdx == 4); 5966 5967 bool Compr = false; 5968 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5969 Compr = true; 5970 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5971 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5972 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5973 } 5974 5975 for (auto i = 0; i < SrcIdx; ++i) { 5976 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5977 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5978 } 5979 } 5980 5981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5983 5984 Inst.addOperand(MCOperand::createImm(EnMask)); 5985 } 5986 5987 //===----------------------------------------------------------------------===// 5988 // s_waitcnt 5989 //===----------------------------------------------------------------------===// 5990 5991 static bool 5992 encodeCnt( 5993 const AMDGPU::IsaVersion ISA, 5994 int64_t &IntVal, 5995 int64_t CntVal, 5996 bool Saturate, 5997 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5998 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5999 { 6000 bool Failed = false; 6001 6002 IntVal = encode(ISA, IntVal, CntVal); 6003 if (CntVal != decode(ISA, IntVal)) { 6004 if (Saturate) { 6005 IntVal = encode(ISA, IntVal, -1); 6006 } else { 6007 Failed = true; 6008 } 6009 } 6010 return Failed; 6011 } 6012 6013 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6014 6015 SMLoc CntLoc = getLoc(); 6016 StringRef CntName = getTokenStr(); 6017 6018 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6019 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6020 return false; 6021 6022 int64_t CntVal; 6023 SMLoc ValLoc = getLoc(); 6024 if (!parseExpr(CntVal)) 6025 return false; 6026 6027 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6028 6029 bool Failed = true; 6030 bool Sat = CntName.endswith("_sat"); 6031 6032 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6033 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6034 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6035 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6036 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6037 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6038 } else { 6039 Error(CntLoc, "invalid counter name " + CntName); 6040 return false; 6041 } 6042 6043 if (Failed) { 6044 Error(ValLoc, "too large value for " + CntName); 6045 return false; 6046 } 6047 6048 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6049 return false; 6050 6051 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6052 if (isToken(AsmToken::EndOfStatement)) { 6053 Error(getLoc(), "expected a counter name"); 6054 return false; 6055 } 6056 } 6057 6058 return true; 6059 } 6060 6061 OperandMatchResultTy 6062 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6063 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6064 int64_t Waitcnt = getWaitcntBitMask(ISA); 6065 SMLoc S = getLoc(); 6066 6067 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6068 while (!isToken(AsmToken::EndOfStatement)) { 6069 if (!parseCnt(Waitcnt)) 6070 return MatchOperand_ParseFail; 6071 } 6072 } else { 6073 if (!parseExpr(Waitcnt)) 6074 return MatchOperand_ParseFail; 6075 } 6076 6077 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6078 return MatchOperand_Success; 6079 } 6080 6081 bool 6082 AMDGPUOperand::isSWaitCnt() const { 6083 return isImm(); 6084 } 6085 6086 //===----------------------------------------------------------------------===// 6087 // hwreg 6088 //===----------------------------------------------------------------------===// 6089 6090 bool 6091 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6092 OperandInfoTy &Offset, 6093 OperandInfoTy &Width) { 6094 using namespace llvm::AMDGPU::Hwreg; 6095 6096 // The register may be specified by name or using a numeric code 6097 HwReg.Loc = getLoc(); 6098 if (isToken(AsmToken::Identifier) && 6099 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6100 HwReg.IsSymbolic = true; 6101 lex(); // skip register name 6102 } else if (!parseExpr(HwReg.Id, "a register name")) { 6103 return false; 6104 } 6105 6106 if (trySkipToken(AsmToken::RParen)) 6107 return true; 6108 6109 // parse optional params 6110 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6111 return false; 6112 6113 Offset.Loc = getLoc(); 6114 if (!parseExpr(Offset.Id)) 6115 return false; 6116 6117 if (!skipToken(AsmToken::Comma, "expected a comma")) 6118 return false; 6119 6120 Width.Loc = getLoc(); 6121 return parseExpr(Width.Id) && 6122 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6123 } 6124 6125 bool 6126 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6127 const OperandInfoTy &Offset, 6128 const OperandInfoTy &Width) { 6129 6130 using namespace llvm::AMDGPU::Hwreg; 6131 6132 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6133 Error(HwReg.Loc, 6134 "specified hardware register is not supported on this GPU"); 6135 return false; 6136 } 6137 if (!isValidHwreg(HwReg.Id)) { 6138 Error(HwReg.Loc, 6139 "invalid code of hardware register: only 6-bit values are legal"); 6140 return false; 6141 } 6142 if (!isValidHwregOffset(Offset.Id)) { 6143 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6144 return false; 6145 } 6146 if (!isValidHwregWidth(Width.Id)) { 6147 Error(Width.Loc, 6148 "invalid bitfield width: only values from 1 to 32 are legal"); 6149 return false; 6150 } 6151 return true; 6152 } 6153 6154 OperandMatchResultTy 6155 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6156 using namespace llvm::AMDGPU::Hwreg; 6157 6158 int64_t ImmVal = 0; 6159 SMLoc Loc = getLoc(); 6160 6161 if (trySkipId("hwreg", AsmToken::LParen)) { 6162 OperandInfoTy HwReg(ID_UNKNOWN_); 6163 OperandInfoTy Offset(OFFSET_DEFAULT_); 6164 OperandInfoTy Width(WIDTH_DEFAULT_); 6165 if (parseHwregBody(HwReg, Offset, Width) && 6166 validateHwreg(HwReg, Offset, Width)) { 6167 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6168 } else { 6169 return MatchOperand_ParseFail; 6170 } 6171 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6172 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6173 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6174 return MatchOperand_ParseFail; 6175 } 6176 } else { 6177 return MatchOperand_ParseFail; 6178 } 6179 6180 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6181 return MatchOperand_Success; 6182 } 6183 6184 bool AMDGPUOperand::isHwreg() const { 6185 return isImmTy(ImmTyHwreg); 6186 } 6187 6188 //===----------------------------------------------------------------------===// 6189 // sendmsg 6190 //===----------------------------------------------------------------------===// 6191 6192 bool 6193 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6194 OperandInfoTy &Op, 6195 OperandInfoTy &Stream) { 6196 using namespace llvm::AMDGPU::SendMsg; 6197 6198 Msg.Loc = getLoc(); 6199 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6200 Msg.IsSymbolic = true; 6201 lex(); // skip message name 6202 } else if (!parseExpr(Msg.Id, "a message name")) { 6203 return false; 6204 } 6205 6206 if (trySkipToken(AsmToken::Comma)) { 6207 Op.IsDefined = true; 6208 Op.Loc = getLoc(); 6209 if (isToken(AsmToken::Identifier) && 6210 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6211 lex(); // skip operation name 6212 } else if (!parseExpr(Op.Id, "an operation name")) { 6213 return false; 6214 } 6215 6216 if (trySkipToken(AsmToken::Comma)) { 6217 Stream.IsDefined = true; 6218 Stream.Loc = getLoc(); 6219 if (!parseExpr(Stream.Id)) 6220 return false; 6221 } 6222 } 6223 6224 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6225 } 6226 6227 bool 6228 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6229 const OperandInfoTy &Op, 6230 const OperandInfoTy &Stream) { 6231 using namespace llvm::AMDGPU::SendMsg; 6232 6233 // Validation strictness depends on whether message is specified 6234 // in a symbolc or in a numeric form. In the latter case 6235 // only encoding possibility is checked. 6236 bool Strict = Msg.IsSymbolic; 6237 6238 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6239 Error(Msg.Loc, "invalid message id"); 6240 return false; 6241 } 6242 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6243 if (Op.IsDefined) { 6244 Error(Op.Loc, "message does not support operations"); 6245 } else { 6246 Error(Msg.Loc, "missing message operation"); 6247 } 6248 return false; 6249 } 6250 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6251 Error(Op.Loc, "invalid operation id"); 6252 return false; 6253 } 6254 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6255 Error(Stream.Loc, "message operation does not support streams"); 6256 return false; 6257 } 6258 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6259 Error(Stream.Loc, "invalid message stream id"); 6260 return false; 6261 } 6262 return true; 6263 } 6264 6265 OperandMatchResultTy 6266 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6267 using namespace llvm::AMDGPU::SendMsg; 6268 6269 int64_t ImmVal = 0; 6270 SMLoc Loc = getLoc(); 6271 6272 if (trySkipId("sendmsg", AsmToken::LParen)) { 6273 OperandInfoTy Msg(ID_UNKNOWN_); 6274 OperandInfoTy Op(OP_NONE_); 6275 OperandInfoTy Stream(STREAM_ID_NONE_); 6276 if (parseSendMsgBody(Msg, Op, Stream) && 6277 validateSendMsg(Msg, Op, Stream)) { 6278 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6279 } else { 6280 return MatchOperand_ParseFail; 6281 } 6282 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6283 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6284 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6285 return MatchOperand_ParseFail; 6286 } 6287 } else { 6288 return MatchOperand_ParseFail; 6289 } 6290 6291 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6292 return MatchOperand_Success; 6293 } 6294 6295 bool AMDGPUOperand::isSendMsg() const { 6296 return isImmTy(ImmTySendMsg); 6297 } 6298 6299 //===----------------------------------------------------------------------===// 6300 // v_interp 6301 //===----------------------------------------------------------------------===// 6302 6303 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6304 StringRef Str; 6305 SMLoc S = getLoc(); 6306 6307 if (!parseId(Str)) 6308 return MatchOperand_NoMatch; 6309 6310 int Slot = StringSwitch<int>(Str) 6311 .Case("p10", 0) 6312 .Case("p20", 1) 6313 .Case("p0", 2) 6314 .Default(-1); 6315 6316 if (Slot == -1) { 6317 Error(S, "invalid interpolation slot"); 6318 return MatchOperand_ParseFail; 6319 } 6320 6321 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6322 AMDGPUOperand::ImmTyInterpSlot)); 6323 return MatchOperand_Success; 6324 } 6325 6326 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6327 StringRef Str; 6328 SMLoc S = getLoc(); 6329 6330 if (!parseId(Str)) 6331 return MatchOperand_NoMatch; 6332 6333 if (!Str.startswith("attr")) { 6334 Error(S, "invalid interpolation attribute"); 6335 return MatchOperand_ParseFail; 6336 } 6337 6338 StringRef Chan = Str.take_back(2); 6339 int AttrChan = StringSwitch<int>(Chan) 6340 .Case(".x", 0) 6341 .Case(".y", 1) 6342 .Case(".z", 2) 6343 .Case(".w", 3) 6344 .Default(-1); 6345 if (AttrChan == -1) { 6346 Error(S, "invalid or missing interpolation attribute channel"); 6347 return MatchOperand_ParseFail; 6348 } 6349 6350 Str = Str.drop_back(2).drop_front(4); 6351 6352 uint8_t Attr; 6353 if (Str.getAsInteger(10, Attr)) { 6354 Error(S, "invalid or missing interpolation attribute number"); 6355 return MatchOperand_ParseFail; 6356 } 6357 6358 if (Attr > 63) { 6359 Error(S, "out of bounds interpolation attribute number"); 6360 return MatchOperand_ParseFail; 6361 } 6362 6363 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6364 6365 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6366 AMDGPUOperand::ImmTyInterpAttr)); 6367 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6368 AMDGPUOperand::ImmTyAttrChan)); 6369 return MatchOperand_Success; 6370 } 6371 6372 //===----------------------------------------------------------------------===// 6373 // exp 6374 //===----------------------------------------------------------------------===// 6375 6376 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6377 using namespace llvm::AMDGPU::Exp; 6378 6379 StringRef Str; 6380 SMLoc S = getLoc(); 6381 6382 if (!parseId(Str)) 6383 return MatchOperand_NoMatch; 6384 6385 unsigned Id = getTgtId(Str); 6386 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6387 Error(S, (Id == ET_INVALID) ? 6388 "invalid exp target" : 6389 "exp target is not supported on this GPU"); 6390 return MatchOperand_ParseFail; 6391 } 6392 6393 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6394 AMDGPUOperand::ImmTyExpTgt)); 6395 return MatchOperand_Success; 6396 } 6397 6398 //===----------------------------------------------------------------------===// 6399 // parser helpers 6400 //===----------------------------------------------------------------------===// 6401 6402 bool 6403 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6404 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6405 } 6406 6407 bool 6408 AMDGPUAsmParser::isId(const StringRef Id) const { 6409 return isId(getToken(), Id); 6410 } 6411 6412 bool 6413 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6414 return getTokenKind() == Kind; 6415 } 6416 6417 bool 6418 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6419 if (isId(Id)) { 6420 lex(); 6421 return true; 6422 } 6423 return false; 6424 } 6425 6426 bool 6427 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6428 if (isToken(AsmToken::Identifier)) { 6429 StringRef Tok = getTokenStr(); 6430 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6431 lex(); 6432 return true; 6433 } 6434 } 6435 return false; 6436 } 6437 6438 bool 6439 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6440 if (isId(Id) && peekToken().is(Kind)) { 6441 lex(); 6442 lex(); 6443 return true; 6444 } 6445 return false; 6446 } 6447 6448 bool 6449 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6450 if (isToken(Kind)) { 6451 lex(); 6452 return true; 6453 } 6454 return false; 6455 } 6456 6457 bool 6458 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6459 const StringRef ErrMsg) { 6460 if (!trySkipToken(Kind)) { 6461 Error(getLoc(), ErrMsg); 6462 return false; 6463 } 6464 return true; 6465 } 6466 6467 bool 6468 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6469 SMLoc S = getLoc(); 6470 6471 const MCExpr *Expr; 6472 if (Parser.parseExpression(Expr)) 6473 return false; 6474 6475 if (Expr->evaluateAsAbsolute(Imm)) 6476 return true; 6477 6478 if (Expected.empty()) { 6479 Error(S, "expected absolute expression"); 6480 } else { 6481 Error(S, Twine("expected ", Expected) + 6482 Twine(" or an absolute expression")); 6483 } 6484 return false; 6485 } 6486 6487 bool 6488 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6489 SMLoc S = getLoc(); 6490 6491 const MCExpr *Expr; 6492 if (Parser.parseExpression(Expr)) 6493 return false; 6494 6495 int64_t IntVal; 6496 if (Expr->evaluateAsAbsolute(IntVal)) { 6497 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6498 } else { 6499 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6500 } 6501 return true; 6502 } 6503 6504 bool 6505 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6506 if (isToken(AsmToken::String)) { 6507 Val = getToken().getStringContents(); 6508 lex(); 6509 return true; 6510 } else { 6511 Error(getLoc(), ErrMsg); 6512 return false; 6513 } 6514 } 6515 6516 bool 6517 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6518 if (isToken(AsmToken::Identifier)) { 6519 Val = getTokenStr(); 6520 lex(); 6521 return true; 6522 } else { 6523 if (!ErrMsg.empty()) 6524 Error(getLoc(), ErrMsg); 6525 return false; 6526 } 6527 } 6528 6529 AsmToken 6530 AMDGPUAsmParser::getToken() const { 6531 return Parser.getTok(); 6532 } 6533 6534 AsmToken 6535 AMDGPUAsmParser::peekToken() { 6536 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6537 } 6538 6539 void 6540 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6541 auto TokCount = getLexer().peekTokens(Tokens); 6542 6543 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6544 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6545 } 6546 6547 AsmToken::TokenKind 6548 AMDGPUAsmParser::getTokenKind() const { 6549 return getLexer().getKind(); 6550 } 6551 6552 SMLoc 6553 AMDGPUAsmParser::getLoc() const { 6554 return getToken().getLoc(); 6555 } 6556 6557 StringRef 6558 AMDGPUAsmParser::getTokenStr() const { 6559 return getToken().getString(); 6560 } 6561 6562 void 6563 AMDGPUAsmParser::lex() { 6564 Parser.Lex(); 6565 } 6566 6567 SMLoc 6568 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6569 const OperandVector &Operands) const { 6570 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6571 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6572 if (Test(Op)) 6573 return Op.getStartLoc(); 6574 } 6575 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6576 } 6577 6578 SMLoc 6579 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6580 const OperandVector &Operands) const { 6581 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6582 return getOperandLoc(Test, Operands); 6583 } 6584 6585 SMLoc 6586 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6587 const OperandVector &Operands) const { 6588 auto Test = [=](const AMDGPUOperand& Op) { 6589 return Op.isRegKind() && Op.getReg() == Reg; 6590 }; 6591 return getOperandLoc(Test, Operands); 6592 } 6593 6594 SMLoc 6595 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6596 auto Test = [](const AMDGPUOperand& Op) { 6597 return Op.IsImmKindLiteral() || Op.isExpr(); 6598 }; 6599 return getOperandLoc(Test, Operands); 6600 } 6601 6602 SMLoc 6603 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6604 auto Test = [](const AMDGPUOperand& Op) { 6605 return Op.isImmKindConst(); 6606 }; 6607 return getOperandLoc(Test, Operands); 6608 } 6609 6610 //===----------------------------------------------------------------------===// 6611 // swizzle 6612 //===----------------------------------------------------------------------===// 6613 6614 LLVM_READNONE 6615 static unsigned 6616 encodeBitmaskPerm(const unsigned AndMask, 6617 const unsigned OrMask, 6618 const unsigned XorMask) { 6619 using namespace llvm::AMDGPU::Swizzle; 6620 6621 return BITMASK_PERM_ENC | 6622 (AndMask << BITMASK_AND_SHIFT) | 6623 (OrMask << BITMASK_OR_SHIFT) | 6624 (XorMask << BITMASK_XOR_SHIFT); 6625 } 6626 6627 bool 6628 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6629 const unsigned MinVal, 6630 const unsigned MaxVal, 6631 const StringRef ErrMsg, 6632 SMLoc &Loc) { 6633 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6634 return false; 6635 } 6636 Loc = getLoc(); 6637 if (!parseExpr(Op)) { 6638 return false; 6639 } 6640 if (Op < MinVal || Op > MaxVal) { 6641 Error(Loc, ErrMsg); 6642 return false; 6643 } 6644 6645 return true; 6646 } 6647 6648 bool 6649 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6650 const unsigned MinVal, 6651 const unsigned MaxVal, 6652 const StringRef ErrMsg) { 6653 SMLoc Loc; 6654 for (unsigned i = 0; i < OpNum; ++i) { 6655 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6656 return false; 6657 } 6658 6659 return true; 6660 } 6661 6662 bool 6663 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6664 using namespace llvm::AMDGPU::Swizzle; 6665 6666 int64_t Lane[LANE_NUM]; 6667 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6668 "expected a 2-bit lane id")) { 6669 Imm = QUAD_PERM_ENC; 6670 for (unsigned I = 0; I < LANE_NUM; ++I) { 6671 Imm |= Lane[I] << (LANE_SHIFT * I); 6672 } 6673 return true; 6674 } 6675 return false; 6676 } 6677 6678 bool 6679 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6680 using namespace llvm::AMDGPU::Swizzle; 6681 6682 SMLoc Loc; 6683 int64_t GroupSize; 6684 int64_t LaneIdx; 6685 6686 if (!parseSwizzleOperand(GroupSize, 6687 2, 32, 6688 "group size must be in the interval [2,32]", 6689 Loc)) { 6690 return false; 6691 } 6692 if (!isPowerOf2_64(GroupSize)) { 6693 Error(Loc, "group size must be a power of two"); 6694 return false; 6695 } 6696 if (parseSwizzleOperand(LaneIdx, 6697 0, GroupSize - 1, 6698 "lane id must be in the interval [0,group size - 1]", 6699 Loc)) { 6700 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6701 return true; 6702 } 6703 return false; 6704 } 6705 6706 bool 6707 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6708 using namespace llvm::AMDGPU::Swizzle; 6709 6710 SMLoc Loc; 6711 int64_t GroupSize; 6712 6713 if (!parseSwizzleOperand(GroupSize, 6714 2, 32, 6715 "group size must be in the interval [2,32]", 6716 Loc)) { 6717 return false; 6718 } 6719 if (!isPowerOf2_64(GroupSize)) { 6720 Error(Loc, "group size must be a power of two"); 6721 return false; 6722 } 6723 6724 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6725 return true; 6726 } 6727 6728 bool 6729 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6730 using namespace llvm::AMDGPU::Swizzle; 6731 6732 SMLoc Loc; 6733 int64_t GroupSize; 6734 6735 if (!parseSwizzleOperand(GroupSize, 6736 1, 16, 6737 "group size must be in the interval [1,16]", 6738 Loc)) { 6739 return false; 6740 } 6741 if (!isPowerOf2_64(GroupSize)) { 6742 Error(Loc, "group size must be a power of two"); 6743 return false; 6744 } 6745 6746 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6747 return true; 6748 } 6749 6750 bool 6751 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6752 using namespace llvm::AMDGPU::Swizzle; 6753 6754 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6755 return false; 6756 } 6757 6758 StringRef Ctl; 6759 SMLoc StrLoc = getLoc(); 6760 if (!parseString(Ctl)) { 6761 return false; 6762 } 6763 if (Ctl.size() != BITMASK_WIDTH) { 6764 Error(StrLoc, "expected a 5-character mask"); 6765 return false; 6766 } 6767 6768 unsigned AndMask = 0; 6769 unsigned OrMask = 0; 6770 unsigned XorMask = 0; 6771 6772 for (size_t i = 0; i < Ctl.size(); ++i) { 6773 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6774 switch(Ctl[i]) { 6775 default: 6776 Error(StrLoc, "invalid mask"); 6777 return false; 6778 case '0': 6779 break; 6780 case '1': 6781 OrMask |= Mask; 6782 break; 6783 case 'p': 6784 AndMask |= Mask; 6785 break; 6786 case 'i': 6787 AndMask |= Mask; 6788 XorMask |= Mask; 6789 break; 6790 } 6791 } 6792 6793 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6794 return true; 6795 } 6796 6797 bool 6798 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6799 6800 SMLoc OffsetLoc = getLoc(); 6801 6802 if (!parseExpr(Imm, "a swizzle macro")) { 6803 return false; 6804 } 6805 if (!isUInt<16>(Imm)) { 6806 Error(OffsetLoc, "expected a 16-bit offset"); 6807 return false; 6808 } 6809 return true; 6810 } 6811 6812 bool 6813 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6814 using namespace llvm::AMDGPU::Swizzle; 6815 6816 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6817 6818 SMLoc ModeLoc = getLoc(); 6819 bool Ok = false; 6820 6821 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6822 Ok = parseSwizzleQuadPerm(Imm); 6823 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6824 Ok = parseSwizzleBitmaskPerm(Imm); 6825 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6826 Ok = parseSwizzleBroadcast(Imm); 6827 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6828 Ok = parseSwizzleSwap(Imm); 6829 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6830 Ok = parseSwizzleReverse(Imm); 6831 } else { 6832 Error(ModeLoc, "expected a swizzle mode"); 6833 } 6834 6835 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6836 } 6837 6838 return false; 6839 } 6840 6841 OperandMatchResultTy 6842 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6843 SMLoc S = getLoc(); 6844 int64_t Imm = 0; 6845 6846 if (trySkipId("offset")) { 6847 6848 bool Ok = false; 6849 if (skipToken(AsmToken::Colon, "expected a colon")) { 6850 if (trySkipId("swizzle")) { 6851 Ok = parseSwizzleMacro(Imm); 6852 } else { 6853 Ok = parseSwizzleOffset(Imm); 6854 } 6855 } 6856 6857 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6858 6859 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6860 } else { 6861 // Swizzle "offset" operand is optional. 6862 // If it is omitted, try parsing other optional operands. 6863 return parseOptionalOpr(Operands); 6864 } 6865 } 6866 6867 bool 6868 AMDGPUOperand::isSwizzle() const { 6869 return isImmTy(ImmTySwizzle); 6870 } 6871 6872 //===----------------------------------------------------------------------===// 6873 // VGPR Index Mode 6874 //===----------------------------------------------------------------------===// 6875 6876 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6877 6878 using namespace llvm::AMDGPU::VGPRIndexMode; 6879 6880 if (trySkipToken(AsmToken::RParen)) { 6881 return OFF; 6882 } 6883 6884 int64_t Imm = 0; 6885 6886 while (true) { 6887 unsigned Mode = 0; 6888 SMLoc S = getLoc(); 6889 6890 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6891 if (trySkipId(IdSymbolic[ModeId])) { 6892 Mode = 1 << ModeId; 6893 break; 6894 } 6895 } 6896 6897 if (Mode == 0) { 6898 Error(S, (Imm == 0)? 6899 "expected a VGPR index mode or a closing parenthesis" : 6900 "expected a VGPR index mode"); 6901 return UNDEF; 6902 } 6903 6904 if (Imm & Mode) { 6905 Error(S, "duplicate VGPR index mode"); 6906 return UNDEF; 6907 } 6908 Imm |= Mode; 6909 6910 if (trySkipToken(AsmToken::RParen)) 6911 break; 6912 if (!skipToken(AsmToken::Comma, 6913 "expected a comma or a closing parenthesis")) 6914 return UNDEF; 6915 } 6916 6917 return Imm; 6918 } 6919 6920 OperandMatchResultTy 6921 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6922 6923 using namespace llvm::AMDGPU::VGPRIndexMode; 6924 6925 int64_t Imm = 0; 6926 SMLoc S = getLoc(); 6927 6928 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6929 Imm = parseGPRIdxMacro(); 6930 if (Imm == UNDEF) 6931 return MatchOperand_ParseFail; 6932 } else { 6933 if (getParser().parseAbsoluteExpression(Imm)) 6934 return MatchOperand_ParseFail; 6935 if (Imm < 0 || !isUInt<4>(Imm)) { 6936 Error(S, "invalid immediate: only 4-bit values are legal"); 6937 return MatchOperand_ParseFail; 6938 } 6939 } 6940 6941 Operands.push_back( 6942 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6943 return MatchOperand_Success; 6944 } 6945 6946 bool AMDGPUOperand::isGPRIdxMode() const { 6947 return isImmTy(ImmTyGprIdxMode); 6948 } 6949 6950 //===----------------------------------------------------------------------===// 6951 // sopp branch targets 6952 //===----------------------------------------------------------------------===// 6953 6954 OperandMatchResultTy 6955 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6956 6957 // Make sure we are not parsing something 6958 // that looks like a label or an expression but is not. 6959 // This will improve error messages. 6960 if (isRegister() || isModifier()) 6961 return MatchOperand_NoMatch; 6962 6963 if (!parseExpr(Operands)) 6964 return MatchOperand_ParseFail; 6965 6966 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6967 assert(Opr.isImm() || Opr.isExpr()); 6968 SMLoc Loc = Opr.getStartLoc(); 6969 6970 // Currently we do not support arbitrary expressions as branch targets. 6971 // Only labels and absolute expressions are accepted. 6972 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6973 Error(Loc, "expected an absolute expression or a label"); 6974 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6975 Error(Loc, "expected a 16-bit signed jump offset"); 6976 } 6977 6978 return MatchOperand_Success; 6979 } 6980 6981 //===----------------------------------------------------------------------===// 6982 // Boolean holding registers 6983 //===----------------------------------------------------------------------===// 6984 6985 OperandMatchResultTy 6986 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6987 return parseReg(Operands); 6988 } 6989 6990 //===----------------------------------------------------------------------===// 6991 // mubuf 6992 //===----------------------------------------------------------------------===// 6993 6994 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6995 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6996 } 6997 6998 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6999 const OperandVector &Operands, 7000 bool IsAtomic, 7001 bool IsLds) { 7002 bool IsLdsOpcode = IsLds; 7003 bool HasLdsModifier = false; 7004 OptionalImmIndexMap OptionalIdx; 7005 unsigned FirstOperandIdx = 1; 7006 bool IsAtomicReturn = false; 7007 7008 if (IsAtomic) { 7009 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7010 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7011 if (!Op.isCPol()) 7012 continue; 7013 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7014 break; 7015 } 7016 7017 if (!IsAtomicReturn) { 7018 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7019 if (NewOpc != -1) 7020 Inst.setOpcode(NewOpc); 7021 } 7022 7023 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7024 SIInstrFlags::IsAtomicRet; 7025 } 7026 7027 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7028 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7029 7030 // Add the register arguments 7031 if (Op.isReg()) { 7032 Op.addRegOperands(Inst, 1); 7033 // Insert a tied src for atomic return dst. 7034 // This cannot be postponed as subsequent calls to 7035 // addImmOperands rely on correct number of MC operands. 7036 if (IsAtomicReturn && i == FirstOperandIdx) 7037 Op.addRegOperands(Inst, 1); 7038 continue; 7039 } 7040 7041 // Handle the case where soffset is an immediate 7042 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7043 Op.addImmOperands(Inst, 1); 7044 continue; 7045 } 7046 7047 HasLdsModifier |= Op.isLDS(); 7048 7049 // Handle tokens like 'offen' which are sometimes hard-coded into the 7050 // asm string. There are no MCInst operands for these. 7051 if (Op.isToken()) { 7052 continue; 7053 } 7054 assert(Op.isImm()); 7055 7056 // Handle optional arguments 7057 OptionalIdx[Op.getImmTy()] = i; 7058 } 7059 7060 // This is a workaround for an llvm quirk which may result in an 7061 // incorrect instruction selection. Lds and non-lds versions of 7062 // MUBUF instructions are identical except that lds versions 7063 // have mandatory 'lds' modifier. However this modifier follows 7064 // optional modifiers and llvm asm matcher regards this 'lds' 7065 // modifier as an optional one. As a result, an lds version 7066 // of opcode may be selected even if it has no 'lds' modifier. 7067 if (IsLdsOpcode && !HasLdsModifier) { 7068 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7069 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7070 Inst.setOpcode(NoLdsOpcode); 7071 IsLdsOpcode = false; 7072 } 7073 } 7074 7075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7077 7078 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7080 } 7081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7082 } 7083 7084 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7085 OptionalImmIndexMap OptionalIdx; 7086 7087 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7088 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7089 7090 // Add the register arguments 7091 if (Op.isReg()) { 7092 Op.addRegOperands(Inst, 1); 7093 continue; 7094 } 7095 7096 // Handle the case where soffset is an immediate 7097 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7098 Op.addImmOperands(Inst, 1); 7099 continue; 7100 } 7101 7102 // Handle tokens like 'offen' which are sometimes hard-coded into the 7103 // asm string. There are no MCInst operands for these. 7104 if (Op.isToken()) { 7105 continue; 7106 } 7107 assert(Op.isImm()); 7108 7109 // Handle optional arguments 7110 OptionalIdx[Op.getImmTy()] = i; 7111 } 7112 7113 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7114 AMDGPUOperand::ImmTyOffset); 7115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7116 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7119 } 7120 7121 //===----------------------------------------------------------------------===// 7122 // mimg 7123 //===----------------------------------------------------------------------===// 7124 7125 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7126 bool IsAtomic) { 7127 unsigned I = 1; 7128 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7129 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7130 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7131 } 7132 7133 if (IsAtomic) { 7134 // Add src, same as dst 7135 assert(Desc.getNumDefs() == 1); 7136 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7137 } 7138 7139 OptionalImmIndexMap OptionalIdx; 7140 7141 for (unsigned E = Operands.size(); I != E; ++I) { 7142 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7143 7144 // Add the register arguments 7145 if (Op.isReg()) { 7146 Op.addRegOperands(Inst, 1); 7147 } else if (Op.isImmModifier()) { 7148 OptionalIdx[Op.getImmTy()] = I; 7149 } else if (!Op.isToken()) { 7150 llvm_unreachable("unexpected operand type"); 7151 } 7152 } 7153 7154 bool IsGFX10Plus = isGFX10Plus(); 7155 7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7157 if (IsGFX10Plus) 7158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7162 if (IsGFX10Plus) 7163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7164 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7165 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7166 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7167 if (!IsGFX10Plus) 7168 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7169 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7170 } 7171 7172 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7173 cvtMIMG(Inst, Operands, true); 7174 } 7175 7176 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7177 OptionalImmIndexMap OptionalIdx; 7178 bool IsAtomicReturn = false; 7179 7180 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7181 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7182 if (!Op.isCPol()) 7183 continue; 7184 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7185 break; 7186 } 7187 7188 if (!IsAtomicReturn) { 7189 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7190 if (NewOpc != -1) 7191 Inst.setOpcode(NewOpc); 7192 } 7193 7194 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7195 SIInstrFlags::IsAtomicRet; 7196 7197 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7198 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7199 7200 // Add the register arguments 7201 if (Op.isReg()) { 7202 Op.addRegOperands(Inst, 1); 7203 if (IsAtomicReturn && i == 1) 7204 Op.addRegOperands(Inst, 1); 7205 continue; 7206 } 7207 7208 // Handle the case where soffset is an immediate 7209 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7210 Op.addImmOperands(Inst, 1); 7211 continue; 7212 } 7213 7214 // Handle tokens like 'offen' which are sometimes hard-coded into the 7215 // asm string. There are no MCInst operands for these. 7216 if (Op.isToken()) { 7217 continue; 7218 } 7219 assert(Op.isImm()); 7220 7221 // Handle optional arguments 7222 OptionalIdx[Op.getImmTy()] = i; 7223 } 7224 7225 if ((int)Inst.getNumOperands() <= 7226 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7227 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7228 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7229 } 7230 7231 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7232 const OperandVector &Operands) { 7233 for (unsigned I = 1; I < Operands.size(); ++I) { 7234 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7235 if (Operand.isReg()) 7236 Operand.addRegOperands(Inst, 1); 7237 } 7238 7239 Inst.addOperand(MCOperand::createImm(1)); // a16 7240 } 7241 7242 //===----------------------------------------------------------------------===// 7243 // smrd 7244 //===----------------------------------------------------------------------===// 7245 7246 bool AMDGPUOperand::isSMRDOffset8() const { 7247 return isImm() && isUInt<8>(getImm()); 7248 } 7249 7250 bool AMDGPUOperand::isSMEMOffset() const { 7251 return isImm(); // Offset range is checked later by validator. 7252 } 7253 7254 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7255 // 32-bit literals are only supported on CI and we only want to use them 7256 // when the offset is > 8-bits. 7257 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7258 } 7259 7260 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7261 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7262 } 7263 7264 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7265 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7266 } 7267 7268 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7269 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7270 } 7271 7272 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7273 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7274 } 7275 7276 //===----------------------------------------------------------------------===// 7277 // vop3 7278 //===----------------------------------------------------------------------===// 7279 7280 static bool ConvertOmodMul(int64_t &Mul) { 7281 if (Mul != 1 && Mul != 2 && Mul != 4) 7282 return false; 7283 7284 Mul >>= 1; 7285 return true; 7286 } 7287 7288 static bool ConvertOmodDiv(int64_t &Div) { 7289 if (Div == 1) { 7290 Div = 0; 7291 return true; 7292 } 7293 7294 if (Div == 2) { 7295 Div = 3; 7296 return true; 7297 } 7298 7299 return false; 7300 } 7301 7302 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7303 // This is intentional and ensures compatibility with sp3. 7304 // See bug 35397 for details. 7305 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7306 if (BoundCtrl == 0 || BoundCtrl == 1) { 7307 BoundCtrl = 1; 7308 return true; 7309 } 7310 return false; 7311 } 7312 7313 // Note: the order in this table matches the order of operands in AsmString. 7314 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7315 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7316 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7317 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7318 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7319 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7320 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7321 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7322 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7323 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7324 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7325 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7326 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7327 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7328 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7329 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7330 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7331 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7332 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7333 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7334 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7335 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7336 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7337 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7338 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7339 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7340 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7341 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7342 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7343 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7344 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7345 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7346 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7347 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7348 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7349 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7350 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7351 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7352 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7353 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7354 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7355 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7356 }; 7357 7358 void AMDGPUAsmParser::onBeginOfFile() { 7359 if (!getParser().getStreamer().getTargetStreamer() || 7360 getSTI().getTargetTriple().getArch() == Triple::r600) 7361 return; 7362 7363 if (!getTargetStreamer().getTargetID()) 7364 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7365 7366 if (isHsaAbiVersion3Or4(&getSTI())) 7367 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7368 } 7369 7370 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7371 7372 OperandMatchResultTy res = parseOptionalOpr(Operands); 7373 7374 // This is a hack to enable hardcoded mandatory operands which follow 7375 // optional operands. 7376 // 7377 // Current design assumes that all operands after the first optional operand 7378 // are also optional. However implementation of some instructions violates 7379 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7380 // 7381 // To alleviate this problem, we have to (implicitly) parse extra operands 7382 // to make sure autogenerated parser of custom operands never hit hardcoded 7383 // mandatory operands. 7384 7385 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7386 if (res != MatchOperand_Success || 7387 isToken(AsmToken::EndOfStatement)) 7388 break; 7389 7390 trySkipToken(AsmToken::Comma); 7391 res = parseOptionalOpr(Operands); 7392 } 7393 7394 return res; 7395 } 7396 7397 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7398 OperandMatchResultTy res; 7399 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7400 // try to parse any optional operand here 7401 if (Op.IsBit) { 7402 res = parseNamedBit(Op.Name, Operands, Op.Type); 7403 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7404 res = parseOModOperand(Operands); 7405 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7406 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7407 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7408 res = parseSDWASel(Operands, Op.Name, Op.Type); 7409 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7410 res = parseSDWADstUnused(Operands); 7411 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7412 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7413 Op.Type == AMDGPUOperand::ImmTyNegLo || 7414 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7415 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7416 Op.ConvertResult); 7417 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7418 res = parseDim(Operands); 7419 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7420 res = parseCPol(Operands); 7421 } else { 7422 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7423 } 7424 if (res != MatchOperand_NoMatch) { 7425 return res; 7426 } 7427 } 7428 return MatchOperand_NoMatch; 7429 } 7430 7431 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7432 StringRef Name = getTokenStr(); 7433 if (Name == "mul") { 7434 return parseIntWithPrefix("mul", Operands, 7435 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7436 } 7437 7438 if (Name == "div") { 7439 return parseIntWithPrefix("div", Operands, 7440 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7441 } 7442 7443 return MatchOperand_NoMatch; 7444 } 7445 7446 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7447 cvtVOP3P(Inst, Operands); 7448 7449 int Opc = Inst.getOpcode(); 7450 7451 int SrcNum; 7452 const int Ops[] = { AMDGPU::OpName::src0, 7453 AMDGPU::OpName::src1, 7454 AMDGPU::OpName::src2 }; 7455 for (SrcNum = 0; 7456 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7457 ++SrcNum); 7458 assert(SrcNum > 0); 7459 7460 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7461 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7462 7463 if ((OpSel & (1 << SrcNum)) != 0) { 7464 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7465 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7466 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7467 } 7468 } 7469 7470 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7471 // 1. This operand is input modifiers 7472 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7473 // 2. This is not last operand 7474 && Desc.NumOperands > (OpNum + 1) 7475 // 3. Next operand is register class 7476 && Desc.OpInfo[OpNum + 1].RegClass != -1 7477 // 4. Next register is not tied to any other operand 7478 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7479 } 7480 7481 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7482 { 7483 OptionalImmIndexMap OptionalIdx; 7484 unsigned Opc = Inst.getOpcode(); 7485 7486 unsigned I = 1; 7487 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7488 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7489 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7490 } 7491 7492 for (unsigned E = Operands.size(); I != E; ++I) { 7493 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7494 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7495 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7496 } else if (Op.isInterpSlot() || 7497 Op.isInterpAttr() || 7498 Op.isAttrChan()) { 7499 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7500 } else if (Op.isImmModifier()) { 7501 OptionalIdx[Op.getImmTy()] = I; 7502 } else { 7503 llvm_unreachable("unhandled operand type"); 7504 } 7505 } 7506 7507 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7509 } 7510 7511 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7512 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7513 } 7514 7515 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7517 } 7518 } 7519 7520 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7521 OptionalImmIndexMap &OptionalIdx) { 7522 unsigned Opc = Inst.getOpcode(); 7523 7524 unsigned I = 1; 7525 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7526 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7527 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7528 } 7529 7530 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7531 // This instruction has src modifiers 7532 for (unsigned E = Operands.size(); I != E; ++I) { 7533 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7534 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7535 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7536 } else if (Op.isImmModifier()) { 7537 OptionalIdx[Op.getImmTy()] = I; 7538 } else if (Op.isRegOrImm()) { 7539 Op.addRegOrImmOperands(Inst, 1); 7540 } else { 7541 llvm_unreachable("unhandled operand type"); 7542 } 7543 } 7544 } else { 7545 // No src modifiers 7546 for (unsigned E = Operands.size(); I != E; ++I) { 7547 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7548 if (Op.isMod()) { 7549 OptionalIdx[Op.getImmTy()] = I; 7550 } else { 7551 Op.addRegOrImmOperands(Inst, 1); 7552 } 7553 } 7554 } 7555 7556 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7557 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7558 } 7559 7560 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7561 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7562 } 7563 7564 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7565 // it has src2 register operand that is tied to dst operand 7566 // we don't allow modifiers for this operand in assembler so src2_modifiers 7567 // should be 0. 7568 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7569 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7570 Opc == AMDGPU::V_MAC_F32_e64_vi || 7571 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7572 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7573 Opc == AMDGPU::V_MAC_F16_e64_vi || 7574 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7575 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7576 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7577 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7578 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7579 auto it = Inst.begin(); 7580 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7581 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7582 ++it; 7583 // Copy the operand to ensure it's not invalidated when Inst grows. 7584 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7585 } 7586 } 7587 7588 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7589 OptionalImmIndexMap OptionalIdx; 7590 cvtVOP3(Inst, Operands, OptionalIdx); 7591 } 7592 7593 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7594 OptionalImmIndexMap &OptIdx) { 7595 const int Opc = Inst.getOpcode(); 7596 const MCInstrDesc &Desc = MII.get(Opc); 7597 7598 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7599 7600 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7601 assert(!IsPacked); 7602 Inst.addOperand(Inst.getOperand(0)); 7603 } 7604 7605 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7606 // instruction, and then figure out where to actually put the modifiers 7607 7608 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7609 if (OpSelIdx != -1) { 7610 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7611 } 7612 7613 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7614 if (OpSelHiIdx != -1) { 7615 int DefaultVal = IsPacked ? -1 : 0; 7616 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7617 DefaultVal); 7618 } 7619 7620 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7621 if (NegLoIdx != -1) { 7622 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7623 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7624 } 7625 7626 const int Ops[] = { AMDGPU::OpName::src0, 7627 AMDGPU::OpName::src1, 7628 AMDGPU::OpName::src2 }; 7629 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7630 AMDGPU::OpName::src1_modifiers, 7631 AMDGPU::OpName::src2_modifiers }; 7632 7633 unsigned OpSel = 0; 7634 unsigned OpSelHi = 0; 7635 unsigned NegLo = 0; 7636 unsigned NegHi = 0; 7637 7638 if (OpSelIdx != -1) 7639 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7640 7641 if (OpSelHiIdx != -1) 7642 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7643 7644 if (NegLoIdx != -1) { 7645 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7646 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7647 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7648 } 7649 7650 for (int J = 0; J < 3; ++J) { 7651 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7652 if (OpIdx == -1) 7653 break; 7654 7655 uint32_t ModVal = 0; 7656 7657 if ((OpSel & (1 << J)) != 0) 7658 ModVal |= SISrcMods::OP_SEL_0; 7659 7660 if ((OpSelHi & (1 << J)) != 0) 7661 ModVal |= SISrcMods::OP_SEL_1; 7662 7663 if ((NegLo & (1 << J)) != 0) 7664 ModVal |= SISrcMods::NEG; 7665 7666 if ((NegHi & (1 << J)) != 0) 7667 ModVal |= SISrcMods::NEG_HI; 7668 7669 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7670 7671 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7672 } 7673 } 7674 7675 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7676 OptionalImmIndexMap OptIdx; 7677 cvtVOP3(Inst, Operands, OptIdx); 7678 cvtVOP3P(Inst, Operands, OptIdx); 7679 } 7680 7681 //===----------------------------------------------------------------------===// 7682 // dpp 7683 //===----------------------------------------------------------------------===// 7684 7685 bool AMDGPUOperand::isDPP8() const { 7686 return isImmTy(ImmTyDPP8); 7687 } 7688 7689 bool AMDGPUOperand::isDPPCtrl() const { 7690 using namespace AMDGPU::DPP; 7691 7692 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7693 if (result) { 7694 int64_t Imm = getImm(); 7695 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7696 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7697 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7698 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7699 (Imm == DppCtrl::WAVE_SHL1) || 7700 (Imm == DppCtrl::WAVE_ROL1) || 7701 (Imm == DppCtrl::WAVE_SHR1) || 7702 (Imm == DppCtrl::WAVE_ROR1) || 7703 (Imm == DppCtrl::ROW_MIRROR) || 7704 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7705 (Imm == DppCtrl::BCAST15) || 7706 (Imm == DppCtrl::BCAST31) || 7707 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7708 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7709 } 7710 return false; 7711 } 7712 7713 //===----------------------------------------------------------------------===// 7714 // mAI 7715 //===----------------------------------------------------------------------===// 7716 7717 bool AMDGPUOperand::isBLGP() const { 7718 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7719 } 7720 7721 bool AMDGPUOperand::isCBSZ() const { 7722 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7723 } 7724 7725 bool AMDGPUOperand::isABID() const { 7726 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7727 } 7728 7729 bool AMDGPUOperand::isS16Imm() const { 7730 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7731 } 7732 7733 bool AMDGPUOperand::isU16Imm() const { 7734 return isImm() && isUInt<16>(getImm()); 7735 } 7736 7737 //===----------------------------------------------------------------------===// 7738 // dim 7739 //===----------------------------------------------------------------------===// 7740 7741 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7742 // We want to allow "dim:1D" etc., 7743 // but the initial 1 is tokenized as an integer. 7744 std::string Token; 7745 if (isToken(AsmToken::Integer)) { 7746 SMLoc Loc = getToken().getEndLoc(); 7747 Token = std::string(getTokenStr()); 7748 lex(); 7749 if (getLoc() != Loc) 7750 return false; 7751 } 7752 7753 StringRef Suffix; 7754 if (!parseId(Suffix)) 7755 return false; 7756 Token += Suffix; 7757 7758 StringRef DimId = Token; 7759 if (DimId.startswith("SQ_RSRC_IMG_")) 7760 DimId = DimId.drop_front(12); 7761 7762 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7763 if (!DimInfo) 7764 return false; 7765 7766 Encoding = DimInfo->Encoding; 7767 return true; 7768 } 7769 7770 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7771 if (!isGFX10Plus()) 7772 return MatchOperand_NoMatch; 7773 7774 SMLoc S = getLoc(); 7775 7776 if (!trySkipId("dim", AsmToken::Colon)) 7777 return MatchOperand_NoMatch; 7778 7779 unsigned Encoding; 7780 SMLoc Loc = getLoc(); 7781 if (!parseDimId(Encoding)) { 7782 Error(Loc, "invalid dim value"); 7783 return MatchOperand_ParseFail; 7784 } 7785 7786 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7787 AMDGPUOperand::ImmTyDim)); 7788 return MatchOperand_Success; 7789 } 7790 7791 //===----------------------------------------------------------------------===// 7792 // dpp 7793 //===----------------------------------------------------------------------===// 7794 7795 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7796 SMLoc S = getLoc(); 7797 7798 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7799 return MatchOperand_NoMatch; 7800 7801 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7802 7803 int64_t Sels[8]; 7804 7805 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7806 return MatchOperand_ParseFail; 7807 7808 for (size_t i = 0; i < 8; ++i) { 7809 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7810 return MatchOperand_ParseFail; 7811 7812 SMLoc Loc = getLoc(); 7813 if (getParser().parseAbsoluteExpression(Sels[i])) 7814 return MatchOperand_ParseFail; 7815 if (0 > Sels[i] || 7 < Sels[i]) { 7816 Error(Loc, "expected a 3-bit value"); 7817 return MatchOperand_ParseFail; 7818 } 7819 } 7820 7821 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7822 return MatchOperand_ParseFail; 7823 7824 unsigned DPP8 = 0; 7825 for (size_t i = 0; i < 8; ++i) 7826 DPP8 |= (Sels[i] << (i * 3)); 7827 7828 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7829 return MatchOperand_Success; 7830 } 7831 7832 bool 7833 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7834 const OperandVector &Operands) { 7835 if (Ctrl == "row_newbcast") 7836 return isGFX90A(); 7837 7838 if (Ctrl == "row_share" || 7839 Ctrl == "row_xmask") 7840 return isGFX10Plus(); 7841 7842 if (Ctrl == "wave_shl" || 7843 Ctrl == "wave_shr" || 7844 Ctrl == "wave_rol" || 7845 Ctrl == "wave_ror" || 7846 Ctrl == "row_bcast") 7847 return isVI() || isGFX9(); 7848 7849 return Ctrl == "row_mirror" || 7850 Ctrl == "row_half_mirror" || 7851 Ctrl == "quad_perm" || 7852 Ctrl == "row_shl" || 7853 Ctrl == "row_shr" || 7854 Ctrl == "row_ror"; 7855 } 7856 7857 int64_t 7858 AMDGPUAsmParser::parseDPPCtrlPerm() { 7859 // quad_perm:[%d,%d,%d,%d] 7860 7861 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7862 return -1; 7863 7864 int64_t Val = 0; 7865 for (int i = 0; i < 4; ++i) { 7866 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7867 return -1; 7868 7869 int64_t Temp; 7870 SMLoc Loc = getLoc(); 7871 if (getParser().parseAbsoluteExpression(Temp)) 7872 return -1; 7873 if (Temp < 0 || Temp > 3) { 7874 Error(Loc, "expected a 2-bit value"); 7875 return -1; 7876 } 7877 7878 Val += (Temp << i * 2); 7879 } 7880 7881 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7882 return -1; 7883 7884 return Val; 7885 } 7886 7887 int64_t 7888 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7889 using namespace AMDGPU::DPP; 7890 7891 // sel:%d 7892 7893 int64_t Val; 7894 SMLoc Loc = getLoc(); 7895 7896 if (getParser().parseAbsoluteExpression(Val)) 7897 return -1; 7898 7899 struct DppCtrlCheck { 7900 int64_t Ctrl; 7901 int Lo; 7902 int Hi; 7903 }; 7904 7905 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7906 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7907 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7908 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7909 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7910 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7911 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7912 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7913 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7914 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7915 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7916 .Default({-1, 0, 0}); 7917 7918 bool Valid; 7919 if (Check.Ctrl == -1) { 7920 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7921 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7922 } else { 7923 Valid = Check.Lo <= Val && Val <= Check.Hi; 7924 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7925 } 7926 7927 if (!Valid) { 7928 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7929 return -1; 7930 } 7931 7932 return Val; 7933 } 7934 7935 OperandMatchResultTy 7936 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7937 using namespace AMDGPU::DPP; 7938 7939 if (!isToken(AsmToken::Identifier) || 7940 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7941 return MatchOperand_NoMatch; 7942 7943 SMLoc S = getLoc(); 7944 int64_t Val = -1; 7945 StringRef Ctrl; 7946 7947 parseId(Ctrl); 7948 7949 if (Ctrl == "row_mirror") { 7950 Val = DppCtrl::ROW_MIRROR; 7951 } else if (Ctrl == "row_half_mirror") { 7952 Val = DppCtrl::ROW_HALF_MIRROR; 7953 } else { 7954 if (skipToken(AsmToken::Colon, "expected a colon")) { 7955 if (Ctrl == "quad_perm") { 7956 Val = parseDPPCtrlPerm(); 7957 } else { 7958 Val = parseDPPCtrlSel(Ctrl); 7959 } 7960 } 7961 } 7962 7963 if (Val == -1) 7964 return MatchOperand_ParseFail; 7965 7966 Operands.push_back( 7967 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7968 return MatchOperand_Success; 7969 } 7970 7971 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7972 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7973 } 7974 7975 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7976 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7977 } 7978 7979 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7980 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7981 } 7982 7983 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7984 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7985 } 7986 7987 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7988 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7989 } 7990 7991 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7992 OptionalImmIndexMap OptionalIdx; 7993 7994 unsigned Opc = Inst.getOpcode(); 7995 bool HasModifiers = 7996 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 7997 unsigned I = 1; 7998 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7999 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8000 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8001 } 8002 8003 int Fi = 0; 8004 for (unsigned E = Operands.size(); I != E; ++I) { 8005 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8006 MCOI::TIED_TO); 8007 if (TiedTo != -1) { 8008 assert((unsigned)TiedTo < Inst.getNumOperands()); 8009 // handle tied old or src2 for MAC instructions 8010 Inst.addOperand(Inst.getOperand(TiedTo)); 8011 } 8012 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8013 // Add the register arguments 8014 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8015 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8016 // Skip it. 8017 continue; 8018 } 8019 8020 if (IsDPP8) { 8021 if (Op.isDPP8()) { 8022 Op.addImmOperands(Inst, 1); 8023 } else if (HasModifiers && 8024 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8025 Op.addRegWithFPInputModsOperands(Inst, 2); 8026 } else if (Op.isFI()) { 8027 Fi = Op.getImm(); 8028 } else if (Op.isReg()) { 8029 Op.addRegOperands(Inst, 1); 8030 } else { 8031 llvm_unreachable("Invalid operand type"); 8032 } 8033 } else { 8034 if (HasModifiers && 8035 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8036 Op.addRegWithFPInputModsOperands(Inst, 2); 8037 } else if (Op.isReg()) { 8038 Op.addRegOperands(Inst, 1); 8039 } else if (Op.isDPPCtrl()) { 8040 Op.addImmOperands(Inst, 1); 8041 } else if (Op.isImm()) { 8042 // Handle optional arguments 8043 OptionalIdx[Op.getImmTy()] = I; 8044 } else { 8045 llvm_unreachable("Invalid operand type"); 8046 } 8047 } 8048 } 8049 8050 if (IsDPP8) { 8051 using namespace llvm::AMDGPU::DPP; 8052 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8053 } else { 8054 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8055 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8057 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8058 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8059 } 8060 } 8061 } 8062 8063 //===----------------------------------------------------------------------===// 8064 // sdwa 8065 //===----------------------------------------------------------------------===// 8066 8067 OperandMatchResultTy 8068 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8069 AMDGPUOperand::ImmTy Type) { 8070 using namespace llvm::AMDGPU::SDWA; 8071 8072 SMLoc S = getLoc(); 8073 StringRef Value; 8074 OperandMatchResultTy res; 8075 8076 SMLoc StringLoc; 8077 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8078 if (res != MatchOperand_Success) { 8079 return res; 8080 } 8081 8082 int64_t Int; 8083 Int = StringSwitch<int64_t>(Value) 8084 .Case("BYTE_0", SdwaSel::BYTE_0) 8085 .Case("BYTE_1", SdwaSel::BYTE_1) 8086 .Case("BYTE_2", SdwaSel::BYTE_2) 8087 .Case("BYTE_3", SdwaSel::BYTE_3) 8088 .Case("WORD_0", SdwaSel::WORD_0) 8089 .Case("WORD_1", SdwaSel::WORD_1) 8090 .Case("DWORD", SdwaSel::DWORD) 8091 .Default(0xffffffff); 8092 8093 if (Int == 0xffffffff) { 8094 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8095 return MatchOperand_ParseFail; 8096 } 8097 8098 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8099 return MatchOperand_Success; 8100 } 8101 8102 OperandMatchResultTy 8103 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8104 using namespace llvm::AMDGPU::SDWA; 8105 8106 SMLoc S = getLoc(); 8107 StringRef Value; 8108 OperandMatchResultTy res; 8109 8110 SMLoc StringLoc; 8111 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8112 if (res != MatchOperand_Success) { 8113 return res; 8114 } 8115 8116 int64_t Int; 8117 Int = StringSwitch<int64_t>(Value) 8118 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8119 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8120 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8121 .Default(0xffffffff); 8122 8123 if (Int == 0xffffffff) { 8124 Error(StringLoc, "invalid dst_unused value"); 8125 return MatchOperand_ParseFail; 8126 } 8127 8128 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8129 return MatchOperand_Success; 8130 } 8131 8132 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8133 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8134 } 8135 8136 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8137 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8138 } 8139 8140 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8141 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8142 } 8143 8144 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8145 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8146 } 8147 8148 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8149 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8150 } 8151 8152 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8153 uint64_t BasicInstType, 8154 bool SkipDstVcc, 8155 bool SkipSrcVcc) { 8156 using namespace llvm::AMDGPU::SDWA; 8157 8158 OptionalImmIndexMap OptionalIdx; 8159 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8160 bool SkippedVcc = false; 8161 8162 unsigned I = 1; 8163 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8164 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8165 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8166 } 8167 8168 for (unsigned E = Operands.size(); I != E; ++I) { 8169 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8170 if (SkipVcc && !SkippedVcc && Op.isReg() && 8171 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8172 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8173 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8174 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8175 // Skip VCC only if we didn't skip it on previous iteration. 8176 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8177 if (BasicInstType == SIInstrFlags::VOP2 && 8178 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8179 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8180 SkippedVcc = true; 8181 continue; 8182 } else if (BasicInstType == SIInstrFlags::VOPC && 8183 Inst.getNumOperands() == 0) { 8184 SkippedVcc = true; 8185 continue; 8186 } 8187 } 8188 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8189 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8190 } else if (Op.isImm()) { 8191 // Handle optional arguments 8192 OptionalIdx[Op.getImmTy()] = I; 8193 } else { 8194 llvm_unreachable("Invalid operand type"); 8195 } 8196 SkippedVcc = false; 8197 } 8198 8199 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8200 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8201 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8202 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8203 switch (BasicInstType) { 8204 case SIInstrFlags::VOP1: 8205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8206 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8207 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8208 } 8209 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8210 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8211 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8212 break; 8213 8214 case SIInstrFlags::VOP2: 8215 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8216 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8217 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8218 } 8219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8221 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8222 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8223 break; 8224 8225 case SIInstrFlags::VOPC: 8226 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8227 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8228 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8229 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8230 break; 8231 8232 default: 8233 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8234 } 8235 } 8236 8237 // special case v_mac_{f16, f32}: 8238 // it has src2 register operand that is tied to dst operand 8239 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8240 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8241 auto it = Inst.begin(); 8242 std::advance( 8243 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8244 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8245 } 8246 } 8247 8248 //===----------------------------------------------------------------------===// 8249 // mAI 8250 //===----------------------------------------------------------------------===// 8251 8252 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8253 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8254 } 8255 8256 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8257 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8258 } 8259 8260 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8261 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8262 } 8263 8264 /// Force static initialization. 8265 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8266 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8267 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8268 } 8269 8270 #define GET_REGISTER_MATCHER 8271 #define GET_MATCHER_IMPLEMENTATION 8272 #define GET_MNEMONIC_SPELL_CHECKER 8273 #define GET_MNEMONIC_CHECKER 8274 #include "AMDGPUGenAsmMatcher.inc" 8275 8276 // This fuction should be defined after auto-generated include so that we have 8277 // MatchClassKind enum defined 8278 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8279 unsigned Kind) { 8280 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8281 // But MatchInstructionImpl() expects to meet token and fails to validate 8282 // operand. This method checks if we are given immediate operand but expect to 8283 // get corresponding token. 8284 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8285 switch (Kind) { 8286 case MCK_addr64: 8287 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8288 case MCK_gds: 8289 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8290 case MCK_lds: 8291 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8292 case MCK_idxen: 8293 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8294 case MCK_offen: 8295 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8296 case MCK_SSrcB32: 8297 // When operands have expression values, they will return true for isToken, 8298 // because it is not possible to distinguish between a token and an 8299 // expression at parse time. MatchInstructionImpl() will always try to 8300 // match an operand as a token, when isToken returns true, and when the 8301 // name of the expression is not a valid token, the match will fail, 8302 // so we need to handle it here. 8303 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8304 case MCK_SSrcF32: 8305 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8306 case MCK_SoppBrTarget: 8307 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8308 case MCK_VReg32OrOff: 8309 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8310 case MCK_InterpSlot: 8311 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8312 case MCK_Attr: 8313 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8314 case MCK_AttrChan: 8315 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8316 case MCK_ImmSMEMOffset: 8317 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8318 case MCK_SReg_64: 8319 case MCK_SReg_64_XEXEC: 8320 // Null is defined as a 32-bit register but 8321 // it should also be enabled with 64-bit operands. 8322 // The following code enables it for SReg_64 operands 8323 // used as source and destination. Remaining source 8324 // operands are handled in isInlinableImm. 8325 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8326 default: 8327 return Match_InvalidOperand; 8328 } 8329 } 8330 8331 //===----------------------------------------------------------------------===// 8332 // endpgm 8333 //===----------------------------------------------------------------------===// 8334 8335 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8336 SMLoc S = getLoc(); 8337 int64_t Imm = 0; 8338 8339 if (!parseExpr(Imm)) { 8340 // The operand is optional, if not present default to 0 8341 Imm = 0; 8342 } 8343 8344 if (!isUInt<16>(Imm)) { 8345 Error(S, "expected a 16-bit value"); 8346 return MatchOperand_ParseFail; 8347 } 8348 8349 Operands.push_back( 8350 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8351 return MatchOperand_Success; 8352 } 8353 8354 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8355