1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1333 1334 bool isSI() const { 1335 return AMDGPU::isSI(getSTI()); 1336 } 1337 1338 bool isCI() const { 1339 return AMDGPU::isCI(getSTI()); 1340 } 1341 1342 bool isVI() const { 1343 return AMDGPU::isVI(getSTI()); 1344 } 1345 1346 bool isGFX9() const { 1347 return AMDGPU::isGFX9(getSTI()); 1348 } 1349 1350 bool isGFX90A() const { 1351 return AMDGPU::isGFX90A(getSTI()); 1352 } 1353 1354 bool isGFX9Plus() const { 1355 return AMDGPU::isGFX9Plus(getSTI()); 1356 } 1357 1358 bool isGFX10() const { 1359 return AMDGPU::isGFX10(getSTI()); 1360 } 1361 1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1363 1364 bool isGFX10_BEncoding() const { 1365 return AMDGPU::isGFX10_BEncoding(getSTI()); 1366 } 1367 1368 bool hasInv2PiInlineImm() const { 1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1370 } 1371 1372 bool hasFlatOffsets() const { 1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1374 } 1375 1376 bool hasArchitectedFlatScratch() const { 1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1378 } 1379 1380 bool hasSGPR102_SGPR103() const { 1381 return !isVI() && !isGFX9(); 1382 } 1383 1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1385 1386 bool hasIntClamp() const { 1387 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1388 } 1389 1390 AMDGPUTargetStreamer &getTargetStreamer() { 1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1392 return static_cast<AMDGPUTargetStreamer &>(TS); 1393 } 1394 1395 const MCRegisterInfo *getMRI() const { 1396 // We need this const_cast because for some reason getContext() is not const 1397 // in MCAsmParser. 1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1399 } 1400 1401 const MCInstrInfo *getMII() const { 1402 return &MII; 1403 } 1404 1405 const FeatureBitset &getFeatureBits() const { 1406 return getSTI().getFeatureBits(); 1407 } 1408 1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1412 1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1415 bool isForcedDPP() const { return ForcedDPP; } 1416 bool isForcedSDWA() const { return ForcedSDWA; } 1417 ArrayRef<unsigned> getMatchedVariants() const; 1418 StringRef getMatchedVariantName() const; 1419 1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1422 bool RestoreOnFailure); 1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1425 SMLoc &EndLoc) override; 1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1428 unsigned Kind) override; 1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1430 OperandVector &Operands, MCStreamer &Out, 1431 uint64_t &ErrorInfo, 1432 bool MatchingInlineAsm) override; 1433 bool ParseDirective(AsmToken DirectiveID) override; 1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1435 OperandMode Mode = OperandMode_Default); 1436 StringRef parseMnemonicSuffix(StringRef Name); 1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1438 SMLoc NameLoc, OperandVector &Operands) override; 1439 //bool ProcessInstruction(MCInst &Inst); 1440 1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1442 1443 OperandMatchResultTy 1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t &) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseOperandArrayWithPrefix(const char *Prefix, 1450 OperandVector &Operands, 1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1452 bool (*ConvertResult)(int64_t&) = nullptr); 1453 1454 OperandMatchResultTy 1455 parseNamedBit(StringRef Name, OperandVector &Operands, 1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1457 OperandMatchResultTy parseCPol(OperandVector &Operands); 1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1459 StringRef &Value, 1460 SMLoc &StringLoc); 1461 1462 bool isModifier(); 1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1467 bool parseSP3NegModifier(); 1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1469 OperandMatchResultTy parseReg(OperandVector &Operands); 1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1477 OperandMatchResultTy parseUfmt(int64_t &Format); 1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1482 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1485 1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1489 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1490 1491 bool parseCnt(int64_t &IntVal); 1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1493 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1494 1495 private: 1496 struct OperandInfoTy { 1497 SMLoc Loc; 1498 int64_t Id; 1499 bool IsSymbolic = false; 1500 bool IsDefined = false; 1501 1502 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1503 }; 1504 1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1506 bool validateSendMsg(const OperandInfoTy &Msg, 1507 const OperandInfoTy &Op, 1508 const OperandInfoTy &Stream); 1509 1510 bool parseHwregBody(OperandInfoTy &HwReg, 1511 OperandInfoTy &Offset, 1512 OperandInfoTy &Width); 1513 bool validateHwreg(const OperandInfoTy &HwReg, 1514 const OperandInfoTy &Offset, 1515 const OperandInfoTy &Width); 1516 1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1519 1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1521 const OperandVector &Operands) const; 1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1524 SMLoc getLitLoc(const OperandVector &Operands) const; 1525 SMLoc getConstLoc(const OperandVector &Operands) const; 1526 1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1530 bool validateSOPLiteral(const MCInst &Inst) const; 1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateIntClampSupported(const MCInst &Inst); 1534 bool validateMIMGAtomicDMask(const MCInst &Inst); 1535 bool validateMIMGGatherDMask(const MCInst &Inst); 1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateMIMGDataSize(const MCInst &Inst); 1538 bool validateMIMGAddrSize(const MCInst &Inst); 1539 bool validateMIMGD16(const MCInst &Inst); 1540 bool validateMIMGDim(const MCInst &Inst); 1541 bool validateMIMGMSAA(const MCInst &Inst); 1542 bool validateOpSel(const MCInst &Inst); 1543 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1544 bool validateVccOperand(unsigned Reg) const; 1545 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1547 bool validateAGPRLdSt(const MCInst &Inst) const; 1548 bool validateVGPRAlign(const MCInst &Inst) const; 1549 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1550 bool validateDivScale(const MCInst &Inst); 1551 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1552 const SMLoc &IDLoc); 1553 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1554 unsigned getConstantBusLimit(unsigned Opcode) const; 1555 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1556 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1557 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1558 1559 bool isSupportedMnemo(StringRef Mnemo, 1560 const FeatureBitset &FBS); 1561 bool isSupportedMnemo(StringRef Mnemo, 1562 const FeatureBitset &FBS, 1563 ArrayRef<unsigned> Variants); 1564 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1565 1566 bool isId(const StringRef Id) const; 1567 bool isId(const AsmToken &Token, const StringRef Id) const; 1568 bool isToken(const AsmToken::TokenKind Kind) const; 1569 bool trySkipId(const StringRef Id); 1570 bool trySkipId(const StringRef Pref, const StringRef Id); 1571 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1572 bool trySkipToken(const AsmToken::TokenKind Kind); 1573 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1574 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1575 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1576 1577 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1578 AsmToken::TokenKind getTokenKind() const; 1579 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1580 bool parseExpr(OperandVector &Operands); 1581 StringRef getTokenStr() const; 1582 AsmToken peekToken(); 1583 AsmToken getToken() const; 1584 SMLoc getLoc() const; 1585 void lex(); 1586 1587 public: 1588 void onBeginOfFile() override; 1589 1590 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1591 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1592 1593 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1594 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1595 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1596 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1597 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1598 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1599 1600 bool parseSwizzleOperand(int64_t &Op, 1601 const unsigned MinVal, 1602 const unsigned MaxVal, 1603 const StringRef ErrMsg, 1604 SMLoc &Loc); 1605 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1606 const unsigned MinVal, 1607 const unsigned MaxVal, 1608 const StringRef ErrMsg); 1609 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1610 bool parseSwizzleOffset(int64_t &Imm); 1611 bool parseSwizzleMacro(int64_t &Imm); 1612 bool parseSwizzleQuadPerm(int64_t &Imm); 1613 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1614 bool parseSwizzleBroadcast(int64_t &Imm); 1615 bool parseSwizzleSwap(int64_t &Imm); 1616 bool parseSwizzleReverse(int64_t &Imm); 1617 1618 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1619 int64_t parseGPRIdxMacro(); 1620 1621 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1622 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1623 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1624 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1625 1626 AMDGPUOperand::Ptr defaultCPol() const; 1627 1628 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1629 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1630 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1631 AMDGPUOperand::Ptr defaultFlatOffset() const; 1632 1633 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1634 1635 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1636 OptionalImmIndexMap &OptionalIdx); 1637 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1638 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1639 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1640 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1641 OptionalImmIndexMap &OptionalIdx); 1642 1643 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1644 1645 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1646 bool IsAtomic = false); 1647 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1648 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1649 1650 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1651 1652 bool parseDimId(unsigned &Encoding); 1653 OperandMatchResultTy parseDim(OperandVector &Operands); 1654 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1655 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1656 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1657 int64_t parseDPPCtrlSel(StringRef Ctrl); 1658 int64_t parseDPPCtrlPerm(); 1659 AMDGPUOperand::Ptr defaultRowMask() const; 1660 AMDGPUOperand::Ptr defaultBankMask() const; 1661 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1662 AMDGPUOperand::Ptr defaultFI() const; 1663 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1664 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1665 1666 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1667 AMDGPUOperand::ImmTy Type); 1668 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1669 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1670 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1671 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1672 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1673 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1674 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1675 uint64_t BasicInstType, 1676 bool SkipDstVcc = false, 1677 bool SkipSrcVcc = false); 1678 1679 AMDGPUOperand::Ptr defaultBLGP() const; 1680 AMDGPUOperand::Ptr defaultCBSZ() const; 1681 AMDGPUOperand::Ptr defaultABID() const; 1682 1683 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1684 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1685 }; 1686 1687 struct OptionalOperand { 1688 const char *Name; 1689 AMDGPUOperand::ImmTy Type; 1690 bool IsBit; 1691 bool (*ConvertResult)(int64_t&); 1692 }; 1693 1694 } // end anonymous namespace 1695 1696 // May be called with integer type with equivalent bitwidth. 1697 static const fltSemantics *getFltSemantics(unsigned Size) { 1698 switch (Size) { 1699 case 4: 1700 return &APFloat::IEEEsingle(); 1701 case 8: 1702 return &APFloat::IEEEdouble(); 1703 case 2: 1704 return &APFloat::IEEEhalf(); 1705 default: 1706 llvm_unreachable("unsupported fp type"); 1707 } 1708 } 1709 1710 static const fltSemantics *getFltSemantics(MVT VT) { 1711 return getFltSemantics(VT.getSizeInBits() / 8); 1712 } 1713 1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1715 switch (OperandType) { 1716 case AMDGPU::OPERAND_REG_IMM_INT32: 1717 case AMDGPU::OPERAND_REG_IMM_FP32: 1718 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1719 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1720 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1721 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1722 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1723 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1724 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1725 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1726 return &APFloat::IEEEsingle(); 1727 case AMDGPU::OPERAND_REG_IMM_INT64: 1728 case AMDGPU::OPERAND_REG_IMM_FP64: 1729 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1730 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1732 return &APFloat::IEEEdouble(); 1733 case AMDGPU::OPERAND_REG_IMM_INT16: 1734 case AMDGPU::OPERAND_REG_IMM_FP16: 1735 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1736 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1737 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1738 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1739 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1740 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1741 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1742 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1743 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1744 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1745 return &APFloat::IEEEhalf(); 1746 default: 1747 llvm_unreachable("unsupported fp type"); 1748 } 1749 } 1750 1751 //===----------------------------------------------------------------------===// 1752 // Operand 1753 //===----------------------------------------------------------------------===// 1754 1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1756 bool Lost; 1757 1758 // Convert literal to single precision 1759 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1760 APFloat::rmNearestTiesToEven, 1761 &Lost); 1762 // We allow precision lost but not overflow or underflow 1763 if (Status != APFloat::opOK && 1764 Lost && 1765 ((Status & APFloat::opOverflow) != 0 || 1766 (Status & APFloat::opUnderflow) != 0)) { 1767 return false; 1768 } 1769 1770 return true; 1771 } 1772 1773 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1774 return isUIntN(Size, Val) || isIntN(Size, Val); 1775 } 1776 1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1778 if (VT.getScalarType() == MVT::i16) { 1779 // FP immediate values are broken. 1780 return isInlinableIntLiteral(Val); 1781 } 1782 1783 // f16/v2f16 operands work correctly for all values. 1784 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1785 } 1786 1787 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1788 1789 // This is a hack to enable named inline values like 1790 // shared_base with both 32-bit and 64-bit operands. 1791 // Note that these values are defined as 1792 // 32-bit operands only. 1793 if (isInlineValue()) { 1794 return true; 1795 } 1796 1797 if (!isImmTy(ImmTyNone)) { 1798 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1799 return false; 1800 } 1801 // TODO: We should avoid using host float here. It would be better to 1802 // check the float bit values which is what a few other places do. 1803 // We've had bot failures before due to weird NaN support on mips hosts. 1804 1805 APInt Literal(64, Imm.Val); 1806 1807 if (Imm.IsFPImm) { // We got fp literal token 1808 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1809 return AMDGPU::isInlinableLiteral64(Imm.Val, 1810 AsmParser->hasInv2PiInlineImm()); 1811 } 1812 1813 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1814 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1815 return false; 1816 1817 if (type.getScalarSizeInBits() == 16) { 1818 return isInlineableLiteralOp16( 1819 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1820 type, AsmParser->hasInv2PiInlineImm()); 1821 } 1822 1823 // Check if single precision literal is inlinable 1824 return AMDGPU::isInlinableLiteral32( 1825 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1826 AsmParser->hasInv2PiInlineImm()); 1827 } 1828 1829 // We got int literal token. 1830 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1831 return AMDGPU::isInlinableLiteral64(Imm.Val, 1832 AsmParser->hasInv2PiInlineImm()); 1833 } 1834 1835 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1836 return false; 1837 } 1838 1839 if (type.getScalarSizeInBits() == 16) { 1840 return isInlineableLiteralOp16( 1841 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1842 type, AsmParser->hasInv2PiInlineImm()); 1843 } 1844 1845 return AMDGPU::isInlinableLiteral32( 1846 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1847 AsmParser->hasInv2PiInlineImm()); 1848 } 1849 1850 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1851 // Check that this immediate can be added as literal 1852 if (!isImmTy(ImmTyNone)) { 1853 return false; 1854 } 1855 1856 if (!Imm.IsFPImm) { 1857 // We got int literal token. 1858 1859 if (type == MVT::f64 && hasFPModifiers()) { 1860 // Cannot apply fp modifiers to int literals preserving the same semantics 1861 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1862 // disable these cases. 1863 return false; 1864 } 1865 1866 unsigned Size = type.getSizeInBits(); 1867 if (Size == 64) 1868 Size = 32; 1869 1870 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1871 // types. 1872 return isSafeTruncation(Imm.Val, Size); 1873 } 1874 1875 // We got fp literal token 1876 if (type == MVT::f64) { // Expected 64-bit fp operand 1877 // We would set low 64-bits of literal to zeroes but we accept this literals 1878 return true; 1879 } 1880 1881 if (type == MVT::i64) { // Expected 64-bit int operand 1882 // We don't allow fp literals in 64-bit integer instructions. It is 1883 // unclear how we should encode them. 1884 return false; 1885 } 1886 1887 // We allow fp literals with f16x2 operands assuming that the specified 1888 // literal goes into the lower half and the upper half is zero. We also 1889 // require that the literal may be losslesly converted to f16. 1890 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1891 (type == MVT::v2i16)? MVT::i16 : 1892 (type == MVT::v2f32)? MVT::f32 : type; 1893 1894 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1895 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1896 } 1897 1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1899 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1900 } 1901 1902 bool AMDGPUOperand::isVRegWithInputMods() const { 1903 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1904 // GFX90A allows DPP on 64-bit operands. 1905 (isRegClass(AMDGPU::VReg_64RegClassID) && 1906 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1907 } 1908 1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1910 if (AsmParser->isVI()) 1911 return isVReg32(); 1912 else if (AsmParser->isGFX9Plus()) 1913 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1914 else 1915 return false; 1916 } 1917 1918 bool AMDGPUOperand::isSDWAFP16Operand() const { 1919 return isSDWAOperand(MVT::f16); 1920 } 1921 1922 bool AMDGPUOperand::isSDWAFP32Operand() const { 1923 return isSDWAOperand(MVT::f32); 1924 } 1925 1926 bool AMDGPUOperand::isSDWAInt16Operand() const { 1927 return isSDWAOperand(MVT::i16); 1928 } 1929 1930 bool AMDGPUOperand::isSDWAInt32Operand() const { 1931 return isSDWAOperand(MVT::i32); 1932 } 1933 1934 bool AMDGPUOperand::isBoolReg() const { 1935 auto FB = AsmParser->getFeatureBits(); 1936 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1937 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1938 } 1939 1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1941 { 1942 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1943 assert(Size == 2 || Size == 4 || Size == 8); 1944 1945 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1946 1947 if (Imm.Mods.Abs) { 1948 Val &= ~FpSignMask; 1949 } 1950 if (Imm.Mods.Neg) { 1951 Val ^= FpSignMask; 1952 } 1953 1954 return Val; 1955 } 1956 1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1958 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1959 Inst.getNumOperands())) { 1960 addLiteralImmOperand(Inst, Imm.Val, 1961 ApplyModifiers & 1962 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1963 } else { 1964 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1965 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1966 setImmKindNone(); 1967 } 1968 } 1969 1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1971 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1972 auto OpNum = Inst.getNumOperands(); 1973 // Check that this operand accepts literals 1974 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1975 1976 if (ApplyModifiers) { 1977 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1978 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1979 Val = applyInputFPModifiers(Val, Size); 1980 } 1981 1982 APInt Literal(64, Val); 1983 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1984 1985 if (Imm.IsFPImm) { // We got fp literal token 1986 switch (OpTy) { 1987 case AMDGPU::OPERAND_REG_IMM_INT64: 1988 case AMDGPU::OPERAND_REG_IMM_FP64: 1989 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1990 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1991 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1992 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1993 AsmParser->hasInv2PiInlineImm())) { 1994 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1995 setImmKindConst(); 1996 return; 1997 } 1998 1999 // Non-inlineable 2000 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2001 // For fp operands we check if low 32 bits are zeros 2002 if (Literal.getLoBits(32) != 0) { 2003 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2004 "Can't encode literal as exact 64-bit floating-point operand. " 2005 "Low 32-bits will be set to zero"); 2006 } 2007 2008 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2009 setImmKindLiteral(); 2010 return; 2011 } 2012 2013 // We don't allow fp literals in 64-bit integer instructions. It is 2014 // unclear how we should encode them. This case should be checked earlier 2015 // in predicate methods (isLiteralImm()) 2016 llvm_unreachable("fp literal in 64-bit integer instruction."); 2017 2018 case AMDGPU::OPERAND_REG_IMM_INT32: 2019 case AMDGPU::OPERAND_REG_IMM_FP32: 2020 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2021 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2022 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2023 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2024 case AMDGPU::OPERAND_REG_IMM_INT16: 2025 case AMDGPU::OPERAND_REG_IMM_FP16: 2026 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2027 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2028 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2029 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2030 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2031 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2032 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2033 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2034 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2035 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2036 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2037 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2038 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2039 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2040 bool lost; 2041 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2042 // Convert literal to single precision 2043 FPLiteral.convert(*getOpFltSemantics(OpTy), 2044 APFloat::rmNearestTiesToEven, &lost); 2045 // We allow precision lost but not overflow or underflow. This should be 2046 // checked earlier in isLiteralImm() 2047 2048 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2049 Inst.addOperand(MCOperand::createImm(ImmVal)); 2050 setImmKindLiteral(); 2051 return; 2052 } 2053 default: 2054 llvm_unreachable("invalid operand size"); 2055 } 2056 2057 return; 2058 } 2059 2060 // We got int literal token. 2061 // Only sign extend inline immediates. 2062 switch (OpTy) { 2063 case AMDGPU::OPERAND_REG_IMM_INT32: 2064 case AMDGPU::OPERAND_REG_IMM_FP32: 2065 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2066 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2067 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2068 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2069 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2070 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2071 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2072 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2073 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2074 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2075 if (isSafeTruncation(Val, 32) && 2076 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2077 AsmParser->hasInv2PiInlineImm())) { 2078 Inst.addOperand(MCOperand::createImm(Val)); 2079 setImmKindConst(); 2080 return; 2081 } 2082 2083 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2084 setImmKindLiteral(); 2085 return; 2086 2087 case AMDGPU::OPERAND_REG_IMM_INT64: 2088 case AMDGPU::OPERAND_REG_IMM_FP64: 2089 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2090 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2091 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2092 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2093 Inst.addOperand(MCOperand::createImm(Val)); 2094 setImmKindConst(); 2095 return; 2096 } 2097 2098 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2099 setImmKindLiteral(); 2100 return; 2101 2102 case AMDGPU::OPERAND_REG_IMM_INT16: 2103 case AMDGPU::OPERAND_REG_IMM_FP16: 2104 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2105 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2106 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2107 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2108 if (isSafeTruncation(Val, 16) && 2109 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2110 AsmParser->hasInv2PiInlineImm())) { 2111 Inst.addOperand(MCOperand::createImm(Val)); 2112 setImmKindConst(); 2113 return; 2114 } 2115 2116 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2117 setImmKindLiteral(); 2118 return; 2119 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2123 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2124 assert(isSafeTruncation(Val, 16)); 2125 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2126 AsmParser->hasInv2PiInlineImm())); 2127 2128 Inst.addOperand(MCOperand::createImm(Val)); 2129 return; 2130 } 2131 default: 2132 llvm_unreachable("invalid operand size"); 2133 } 2134 } 2135 2136 template <unsigned Bitwidth> 2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2138 APInt Literal(64, Imm.Val); 2139 setImmKindNone(); 2140 2141 if (!Imm.IsFPImm) { 2142 // We got int literal token. 2143 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2144 return; 2145 } 2146 2147 bool Lost; 2148 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2149 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2150 APFloat::rmNearestTiesToEven, &Lost); 2151 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2152 } 2153 2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2155 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2156 } 2157 2158 static bool isInlineValue(unsigned Reg) { 2159 switch (Reg) { 2160 case AMDGPU::SRC_SHARED_BASE: 2161 case AMDGPU::SRC_SHARED_LIMIT: 2162 case AMDGPU::SRC_PRIVATE_BASE: 2163 case AMDGPU::SRC_PRIVATE_LIMIT: 2164 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2165 return true; 2166 case AMDGPU::SRC_VCCZ: 2167 case AMDGPU::SRC_EXECZ: 2168 case AMDGPU::SRC_SCC: 2169 return true; 2170 case AMDGPU::SGPR_NULL: 2171 return true; 2172 default: 2173 return false; 2174 } 2175 } 2176 2177 bool AMDGPUOperand::isInlineValue() const { 2178 return isRegKind() && ::isInlineValue(getReg()); 2179 } 2180 2181 //===----------------------------------------------------------------------===// 2182 // AsmParser 2183 //===----------------------------------------------------------------------===// 2184 2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2186 if (Is == IS_VGPR) { 2187 switch (RegWidth) { 2188 default: return -1; 2189 case 1: return AMDGPU::VGPR_32RegClassID; 2190 case 2: return AMDGPU::VReg_64RegClassID; 2191 case 3: return AMDGPU::VReg_96RegClassID; 2192 case 4: return AMDGPU::VReg_128RegClassID; 2193 case 5: return AMDGPU::VReg_160RegClassID; 2194 case 6: return AMDGPU::VReg_192RegClassID; 2195 case 8: return AMDGPU::VReg_256RegClassID; 2196 case 16: return AMDGPU::VReg_512RegClassID; 2197 case 32: return AMDGPU::VReg_1024RegClassID; 2198 } 2199 } else if (Is == IS_TTMP) { 2200 switch (RegWidth) { 2201 default: return -1; 2202 case 1: return AMDGPU::TTMP_32RegClassID; 2203 case 2: return AMDGPU::TTMP_64RegClassID; 2204 case 4: return AMDGPU::TTMP_128RegClassID; 2205 case 8: return AMDGPU::TTMP_256RegClassID; 2206 case 16: return AMDGPU::TTMP_512RegClassID; 2207 } 2208 } else if (Is == IS_SGPR) { 2209 switch (RegWidth) { 2210 default: return -1; 2211 case 1: return AMDGPU::SGPR_32RegClassID; 2212 case 2: return AMDGPU::SGPR_64RegClassID; 2213 case 3: return AMDGPU::SGPR_96RegClassID; 2214 case 4: return AMDGPU::SGPR_128RegClassID; 2215 case 5: return AMDGPU::SGPR_160RegClassID; 2216 case 6: return AMDGPU::SGPR_192RegClassID; 2217 case 8: return AMDGPU::SGPR_256RegClassID; 2218 case 16: return AMDGPU::SGPR_512RegClassID; 2219 } 2220 } else if (Is == IS_AGPR) { 2221 switch (RegWidth) { 2222 default: return -1; 2223 case 1: return AMDGPU::AGPR_32RegClassID; 2224 case 2: return AMDGPU::AReg_64RegClassID; 2225 case 3: return AMDGPU::AReg_96RegClassID; 2226 case 4: return AMDGPU::AReg_128RegClassID; 2227 case 5: return AMDGPU::AReg_160RegClassID; 2228 case 6: return AMDGPU::AReg_192RegClassID; 2229 case 8: return AMDGPU::AReg_256RegClassID; 2230 case 16: return AMDGPU::AReg_512RegClassID; 2231 case 32: return AMDGPU::AReg_1024RegClassID; 2232 } 2233 } 2234 return -1; 2235 } 2236 2237 static unsigned getSpecialRegForName(StringRef RegName) { 2238 return StringSwitch<unsigned>(RegName) 2239 .Case("exec", AMDGPU::EXEC) 2240 .Case("vcc", AMDGPU::VCC) 2241 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2242 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2243 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2244 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2245 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2246 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2247 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2248 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2249 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2250 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2251 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2252 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2253 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2254 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2255 .Case("m0", AMDGPU::M0) 2256 .Case("vccz", AMDGPU::SRC_VCCZ) 2257 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2258 .Case("execz", AMDGPU::SRC_EXECZ) 2259 .Case("src_execz", AMDGPU::SRC_EXECZ) 2260 .Case("scc", AMDGPU::SRC_SCC) 2261 .Case("src_scc", AMDGPU::SRC_SCC) 2262 .Case("tba", AMDGPU::TBA) 2263 .Case("tma", AMDGPU::TMA) 2264 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2265 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2266 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2267 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2268 .Case("vcc_lo", AMDGPU::VCC_LO) 2269 .Case("vcc_hi", AMDGPU::VCC_HI) 2270 .Case("exec_lo", AMDGPU::EXEC_LO) 2271 .Case("exec_hi", AMDGPU::EXEC_HI) 2272 .Case("tma_lo", AMDGPU::TMA_LO) 2273 .Case("tma_hi", AMDGPU::TMA_HI) 2274 .Case("tba_lo", AMDGPU::TBA_LO) 2275 .Case("tba_hi", AMDGPU::TBA_HI) 2276 .Case("pc", AMDGPU::PC_REG) 2277 .Case("null", AMDGPU::SGPR_NULL) 2278 .Default(AMDGPU::NoRegister); 2279 } 2280 2281 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2282 SMLoc &EndLoc, bool RestoreOnFailure) { 2283 auto R = parseRegister(); 2284 if (!R) return true; 2285 assert(R->isReg()); 2286 RegNo = R->getReg(); 2287 StartLoc = R->getStartLoc(); 2288 EndLoc = R->getEndLoc(); 2289 return false; 2290 } 2291 2292 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2293 SMLoc &EndLoc) { 2294 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2295 } 2296 2297 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2298 SMLoc &StartLoc, 2299 SMLoc &EndLoc) { 2300 bool Result = 2301 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2302 bool PendingErrors = getParser().hasPendingError(); 2303 getParser().clearPendingErrors(); 2304 if (PendingErrors) 2305 return MatchOperand_ParseFail; 2306 if (Result) 2307 return MatchOperand_NoMatch; 2308 return MatchOperand_Success; 2309 } 2310 2311 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2312 RegisterKind RegKind, unsigned Reg1, 2313 SMLoc Loc) { 2314 switch (RegKind) { 2315 case IS_SPECIAL: 2316 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2317 Reg = AMDGPU::EXEC; 2318 RegWidth = 2; 2319 return true; 2320 } 2321 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2322 Reg = AMDGPU::FLAT_SCR; 2323 RegWidth = 2; 2324 return true; 2325 } 2326 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2327 Reg = AMDGPU::XNACK_MASK; 2328 RegWidth = 2; 2329 return true; 2330 } 2331 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2332 Reg = AMDGPU::VCC; 2333 RegWidth = 2; 2334 return true; 2335 } 2336 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2337 Reg = AMDGPU::TBA; 2338 RegWidth = 2; 2339 return true; 2340 } 2341 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2342 Reg = AMDGPU::TMA; 2343 RegWidth = 2; 2344 return true; 2345 } 2346 Error(Loc, "register does not fit in the list"); 2347 return false; 2348 case IS_VGPR: 2349 case IS_SGPR: 2350 case IS_AGPR: 2351 case IS_TTMP: 2352 if (Reg1 != Reg + RegWidth) { 2353 Error(Loc, "registers in a list must have consecutive indices"); 2354 return false; 2355 } 2356 RegWidth++; 2357 return true; 2358 default: 2359 llvm_unreachable("unexpected register kind"); 2360 } 2361 } 2362 2363 struct RegInfo { 2364 StringLiteral Name; 2365 RegisterKind Kind; 2366 }; 2367 2368 static constexpr RegInfo RegularRegisters[] = { 2369 {{"v"}, IS_VGPR}, 2370 {{"s"}, IS_SGPR}, 2371 {{"ttmp"}, IS_TTMP}, 2372 {{"acc"}, IS_AGPR}, 2373 {{"a"}, IS_AGPR}, 2374 }; 2375 2376 static bool isRegularReg(RegisterKind Kind) { 2377 return Kind == IS_VGPR || 2378 Kind == IS_SGPR || 2379 Kind == IS_TTMP || 2380 Kind == IS_AGPR; 2381 } 2382 2383 static const RegInfo* getRegularRegInfo(StringRef Str) { 2384 for (const RegInfo &Reg : RegularRegisters) 2385 if (Str.startswith(Reg.Name)) 2386 return &Reg; 2387 return nullptr; 2388 } 2389 2390 static bool getRegNum(StringRef Str, unsigned& Num) { 2391 return !Str.getAsInteger(10, Num); 2392 } 2393 2394 bool 2395 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2396 const AsmToken &NextToken) const { 2397 2398 // A list of consecutive registers: [s0,s1,s2,s3] 2399 if (Token.is(AsmToken::LBrac)) 2400 return true; 2401 2402 if (!Token.is(AsmToken::Identifier)) 2403 return false; 2404 2405 // A single register like s0 or a range of registers like s[0:1] 2406 2407 StringRef Str = Token.getString(); 2408 const RegInfo *Reg = getRegularRegInfo(Str); 2409 if (Reg) { 2410 StringRef RegName = Reg->Name; 2411 StringRef RegSuffix = Str.substr(RegName.size()); 2412 if (!RegSuffix.empty()) { 2413 unsigned Num; 2414 // A single register with an index: rXX 2415 if (getRegNum(RegSuffix, Num)) 2416 return true; 2417 } else { 2418 // A range of registers: r[XX:YY]. 2419 if (NextToken.is(AsmToken::LBrac)) 2420 return true; 2421 } 2422 } 2423 2424 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2425 } 2426 2427 bool 2428 AMDGPUAsmParser::isRegister() 2429 { 2430 return isRegister(getToken(), peekToken()); 2431 } 2432 2433 unsigned 2434 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2435 unsigned RegNum, 2436 unsigned RegWidth, 2437 SMLoc Loc) { 2438 2439 assert(isRegularReg(RegKind)); 2440 2441 unsigned AlignSize = 1; 2442 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2443 // SGPR and TTMP registers must be aligned. 2444 // Max required alignment is 4 dwords. 2445 AlignSize = std::min(RegWidth, 4u); 2446 } 2447 2448 if (RegNum % AlignSize != 0) { 2449 Error(Loc, "invalid register alignment"); 2450 return AMDGPU::NoRegister; 2451 } 2452 2453 unsigned RegIdx = RegNum / AlignSize; 2454 int RCID = getRegClass(RegKind, RegWidth); 2455 if (RCID == -1) { 2456 Error(Loc, "invalid or unsupported register size"); 2457 return AMDGPU::NoRegister; 2458 } 2459 2460 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2461 const MCRegisterClass RC = TRI->getRegClass(RCID); 2462 if (RegIdx >= RC.getNumRegs()) { 2463 Error(Loc, "register index is out of range"); 2464 return AMDGPU::NoRegister; 2465 } 2466 2467 return RC.getRegister(RegIdx); 2468 } 2469 2470 bool 2471 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2472 int64_t RegLo, RegHi; 2473 if (!skipToken(AsmToken::LBrac, "missing register index")) 2474 return false; 2475 2476 SMLoc FirstIdxLoc = getLoc(); 2477 SMLoc SecondIdxLoc; 2478 2479 if (!parseExpr(RegLo)) 2480 return false; 2481 2482 if (trySkipToken(AsmToken::Colon)) { 2483 SecondIdxLoc = getLoc(); 2484 if (!parseExpr(RegHi)) 2485 return false; 2486 } else { 2487 RegHi = RegLo; 2488 } 2489 2490 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2491 return false; 2492 2493 if (!isUInt<32>(RegLo)) { 2494 Error(FirstIdxLoc, "invalid register index"); 2495 return false; 2496 } 2497 2498 if (!isUInt<32>(RegHi)) { 2499 Error(SecondIdxLoc, "invalid register index"); 2500 return false; 2501 } 2502 2503 if (RegLo > RegHi) { 2504 Error(FirstIdxLoc, "first register index should not exceed second index"); 2505 return false; 2506 } 2507 2508 Num = static_cast<unsigned>(RegLo); 2509 Width = (RegHi - RegLo) + 1; 2510 return true; 2511 } 2512 2513 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2514 unsigned &RegNum, unsigned &RegWidth, 2515 SmallVectorImpl<AsmToken> &Tokens) { 2516 assert(isToken(AsmToken::Identifier)); 2517 unsigned Reg = getSpecialRegForName(getTokenStr()); 2518 if (Reg) { 2519 RegNum = 0; 2520 RegWidth = 1; 2521 RegKind = IS_SPECIAL; 2522 Tokens.push_back(getToken()); 2523 lex(); // skip register name 2524 } 2525 return Reg; 2526 } 2527 2528 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2529 unsigned &RegNum, unsigned &RegWidth, 2530 SmallVectorImpl<AsmToken> &Tokens) { 2531 assert(isToken(AsmToken::Identifier)); 2532 StringRef RegName = getTokenStr(); 2533 auto Loc = getLoc(); 2534 2535 const RegInfo *RI = getRegularRegInfo(RegName); 2536 if (!RI) { 2537 Error(Loc, "invalid register name"); 2538 return AMDGPU::NoRegister; 2539 } 2540 2541 Tokens.push_back(getToken()); 2542 lex(); // skip register name 2543 2544 RegKind = RI->Kind; 2545 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2546 if (!RegSuffix.empty()) { 2547 // Single 32-bit register: vXX. 2548 if (!getRegNum(RegSuffix, RegNum)) { 2549 Error(Loc, "invalid register index"); 2550 return AMDGPU::NoRegister; 2551 } 2552 RegWidth = 1; 2553 } else { 2554 // Range of registers: v[XX:YY]. ":YY" is optional. 2555 if (!ParseRegRange(RegNum, RegWidth)) 2556 return AMDGPU::NoRegister; 2557 } 2558 2559 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2560 } 2561 2562 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2563 unsigned &RegWidth, 2564 SmallVectorImpl<AsmToken> &Tokens) { 2565 unsigned Reg = AMDGPU::NoRegister; 2566 auto ListLoc = getLoc(); 2567 2568 if (!skipToken(AsmToken::LBrac, 2569 "expected a register or a list of registers")) { 2570 return AMDGPU::NoRegister; 2571 } 2572 2573 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2574 2575 auto Loc = getLoc(); 2576 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2577 return AMDGPU::NoRegister; 2578 if (RegWidth != 1) { 2579 Error(Loc, "expected a single 32-bit register"); 2580 return AMDGPU::NoRegister; 2581 } 2582 2583 for (; trySkipToken(AsmToken::Comma); ) { 2584 RegisterKind NextRegKind; 2585 unsigned NextReg, NextRegNum, NextRegWidth; 2586 Loc = getLoc(); 2587 2588 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2589 NextRegNum, NextRegWidth, 2590 Tokens)) { 2591 return AMDGPU::NoRegister; 2592 } 2593 if (NextRegWidth != 1) { 2594 Error(Loc, "expected a single 32-bit register"); 2595 return AMDGPU::NoRegister; 2596 } 2597 if (NextRegKind != RegKind) { 2598 Error(Loc, "registers in a list must be of the same kind"); 2599 return AMDGPU::NoRegister; 2600 } 2601 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2602 return AMDGPU::NoRegister; 2603 } 2604 2605 if (!skipToken(AsmToken::RBrac, 2606 "expected a comma or a closing square bracket")) { 2607 return AMDGPU::NoRegister; 2608 } 2609 2610 if (isRegularReg(RegKind)) 2611 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2612 2613 return Reg; 2614 } 2615 2616 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2617 unsigned &RegNum, unsigned &RegWidth, 2618 SmallVectorImpl<AsmToken> &Tokens) { 2619 auto Loc = getLoc(); 2620 Reg = AMDGPU::NoRegister; 2621 2622 if (isToken(AsmToken::Identifier)) { 2623 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2624 if (Reg == AMDGPU::NoRegister) 2625 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2626 } else { 2627 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2628 } 2629 2630 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2631 if (Reg == AMDGPU::NoRegister) { 2632 assert(Parser.hasPendingError()); 2633 return false; 2634 } 2635 2636 if (!subtargetHasRegister(*TRI, Reg)) { 2637 if (Reg == AMDGPU::SGPR_NULL) { 2638 Error(Loc, "'null' operand is not supported on this GPU"); 2639 } else { 2640 Error(Loc, "register not available on this GPU"); 2641 } 2642 return false; 2643 } 2644 2645 return true; 2646 } 2647 2648 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2649 unsigned &RegNum, unsigned &RegWidth, 2650 bool RestoreOnFailure /*=false*/) { 2651 Reg = AMDGPU::NoRegister; 2652 2653 SmallVector<AsmToken, 1> Tokens; 2654 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2655 if (RestoreOnFailure) { 2656 while (!Tokens.empty()) { 2657 getLexer().UnLex(Tokens.pop_back_val()); 2658 } 2659 } 2660 return true; 2661 } 2662 return false; 2663 } 2664 2665 Optional<StringRef> 2666 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2667 switch (RegKind) { 2668 case IS_VGPR: 2669 return StringRef(".amdgcn.next_free_vgpr"); 2670 case IS_SGPR: 2671 return StringRef(".amdgcn.next_free_sgpr"); 2672 default: 2673 return None; 2674 } 2675 } 2676 2677 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2678 auto SymbolName = getGprCountSymbolName(RegKind); 2679 assert(SymbolName && "initializing invalid register kind"); 2680 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2681 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2682 } 2683 2684 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2685 unsigned DwordRegIndex, 2686 unsigned RegWidth) { 2687 // Symbols are only defined for GCN targets 2688 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2689 return true; 2690 2691 auto SymbolName = getGprCountSymbolName(RegKind); 2692 if (!SymbolName) 2693 return true; 2694 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2695 2696 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2697 int64_t OldCount; 2698 2699 if (!Sym->isVariable()) 2700 return !Error(getLoc(), 2701 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2702 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2703 return !Error( 2704 getLoc(), 2705 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2706 2707 if (OldCount <= NewMax) 2708 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2709 2710 return true; 2711 } 2712 2713 std::unique_ptr<AMDGPUOperand> 2714 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2715 const auto &Tok = getToken(); 2716 SMLoc StartLoc = Tok.getLoc(); 2717 SMLoc EndLoc = Tok.getEndLoc(); 2718 RegisterKind RegKind; 2719 unsigned Reg, RegNum, RegWidth; 2720 2721 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2722 return nullptr; 2723 } 2724 if (isHsaAbiVersion3Or4(&getSTI())) { 2725 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2726 return nullptr; 2727 } else 2728 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2729 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2730 } 2731 2732 OperandMatchResultTy 2733 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2734 // TODO: add syntactic sugar for 1/(2*PI) 2735 2736 assert(!isRegister()); 2737 assert(!isModifier()); 2738 2739 const auto& Tok = getToken(); 2740 const auto& NextTok = peekToken(); 2741 bool IsReal = Tok.is(AsmToken::Real); 2742 SMLoc S = getLoc(); 2743 bool Negate = false; 2744 2745 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2746 lex(); 2747 IsReal = true; 2748 Negate = true; 2749 } 2750 2751 if (IsReal) { 2752 // Floating-point expressions are not supported. 2753 // Can only allow floating-point literals with an 2754 // optional sign. 2755 2756 StringRef Num = getTokenStr(); 2757 lex(); 2758 2759 APFloat RealVal(APFloat::IEEEdouble()); 2760 auto roundMode = APFloat::rmNearestTiesToEven; 2761 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2762 return MatchOperand_ParseFail; 2763 } 2764 if (Negate) 2765 RealVal.changeSign(); 2766 2767 Operands.push_back( 2768 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2769 AMDGPUOperand::ImmTyNone, true)); 2770 2771 return MatchOperand_Success; 2772 2773 } else { 2774 int64_t IntVal; 2775 const MCExpr *Expr; 2776 SMLoc S = getLoc(); 2777 2778 if (HasSP3AbsModifier) { 2779 // This is a workaround for handling expressions 2780 // as arguments of SP3 'abs' modifier, for example: 2781 // |1.0| 2782 // |-1| 2783 // |1+x| 2784 // This syntax is not compatible with syntax of standard 2785 // MC expressions (due to the trailing '|'). 2786 SMLoc EndLoc; 2787 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2788 return MatchOperand_ParseFail; 2789 } else { 2790 if (Parser.parseExpression(Expr)) 2791 return MatchOperand_ParseFail; 2792 } 2793 2794 if (Expr->evaluateAsAbsolute(IntVal)) { 2795 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2796 } else { 2797 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2798 } 2799 2800 return MatchOperand_Success; 2801 } 2802 2803 return MatchOperand_NoMatch; 2804 } 2805 2806 OperandMatchResultTy 2807 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2808 if (!isRegister()) 2809 return MatchOperand_NoMatch; 2810 2811 if (auto R = parseRegister()) { 2812 assert(R->isReg()); 2813 Operands.push_back(std::move(R)); 2814 return MatchOperand_Success; 2815 } 2816 return MatchOperand_ParseFail; 2817 } 2818 2819 OperandMatchResultTy 2820 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2821 auto res = parseReg(Operands); 2822 if (res != MatchOperand_NoMatch) { 2823 return res; 2824 } else if (isModifier()) { 2825 return MatchOperand_NoMatch; 2826 } else { 2827 return parseImm(Operands, HasSP3AbsMod); 2828 } 2829 } 2830 2831 bool 2832 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2833 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2834 const auto &str = Token.getString(); 2835 return str == "abs" || str == "neg" || str == "sext"; 2836 } 2837 return false; 2838 } 2839 2840 bool 2841 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2842 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2843 } 2844 2845 bool 2846 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2847 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2848 } 2849 2850 bool 2851 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2852 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2853 } 2854 2855 // Check if this is an operand modifier or an opcode modifier 2856 // which may look like an expression but it is not. We should 2857 // avoid parsing these modifiers as expressions. Currently 2858 // recognized sequences are: 2859 // |...| 2860 // abs(...) 2861 // neg(...) 2862 // sext(...) 2863 // -reg 2864 // -|...| 2865 // -abs(...) 2866 // name:... 2867 // Note that simple opcode modifiers like 'gds' may be parsed as 2868 // expressions; this is a special case. See getExpressionAsToken. 2869 // 2870 bool 2871 AMDGPUAsmParser::isModifier() { 2872 2873 AsmToken Tok = getToken(); 2874 AsmToken NextToken[2]; 2875 peekTokens(NextToken); 2876 2877 return isOperandModifier(Tok, NextToken[0]) || 2878 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2879 isOpcodeModifierWithVal(Tok, NextToken[0]); 2880 } 2881 2882 // Check if the current token is an SP3 'neg' modifier. 2883 // Currently this modifier is allowed in the following context: 2884 // 2885 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2886 // 2. Before an 'abs' modifier: -abs(...) 2887 // 3. Before an SP3 'abs' modifier: -|...| 2888 // 2889 // In all other cases "-" is handled as a part 2890 // of an expression that follows the sign. 2891 // 2892 // Note: When "-" is followed by an integer literal, 2893 // this is interpreted as integer negation rather 2894 // than a floating-point NEG modifier applied to N. 2895 // Beside being contr-intuitive, such use of floating-point 2896 // NEG modifier would have resulted in different meaning 2897 // of integer literals used with VOP1/2/C and VOP3, 2898 // for example: 2899 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2900 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2901 // Negative fp literals with preceding "-" are 2902 // handled likewise for unifomtity 2903 // 2904 bool 2905 AMDGPUAsmParser::parseSP3NegModifier() { 2906 2907 AsmToken NextToken[2]; 2908 peekTokens(NextToken); 2909 2910 if (isToken(AsmToken::Minus) && 2911 (isRegister(NextToken[0], NextToken[1]) || 2912 NextToken[0].is(AsmToken::Pipe) || 2913 isId(NextToken[0], "abs"))) { 2914 lex(); 2915 return true; 2916 } 2917 2918 return false; 2919 } 2920 2921 OperandMatchResultTy 2922 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2923 bool AllowImm) { 2924 bool Neg, SP3Neg; 2925 bool Abs, SP3Abs; 2926 SMLoc Loc; 2927 2928 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2929 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2930 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2931 return MatchOperand_ParseFail; 2932 } 2933 2934 SP3Neg = parseSP3NegModifier(); 2935 2936 Loc = getLoc(); 2937 Neg = trySkipId("neg"); 2938 if (Neg && SP3Neg) { 2939 Error(Loc, "expected register or immediate"); 2940 return MatchOperand_ParseFail; 2941 } 2942 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2943 return MatchOperand_ParseFail; 2944 2945 Abs = trySkipId("abs"); 2946 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2947 return MatchOperand_ParseFail; 2948 2949 Loc = getLoc(); 2950 SP3Abs = trySkipToken(AsmToken::Pipe); 2951 if (Abs && SP3Abs) { 2952 Error(Loc, "expected register or immediate"); 2953 return MatchOperand_ParseFail; 2954 } 2955 2956 OperandMatchResultTy Res; 2957 if (AllowImm) { 2958 Res = parseRegOrImm(Operands, SP3Abs); 2959 } else { 2960 Res = parseReg(Operands); 2961 } 2962 if (Res != MatchOperand_Success) { 2963 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2964 } 2965 2966 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2967 return MatchOperand_ParseFail; 2968 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2969 return MatchOperand_ParseFail; 2970 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2971 return MatchOperand_ParseFail; 2972 2973 AMDGPUOperand::Modifiers Mods; 2974 Mods.Abs = Abs || SP3Abs; 2975 Mods.Neg = Neg || SP3Neg; 2976 2977 if (Mods.hasFPModifiers()) { 2978 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2979 if (Op.isExpr()) { 2980 Error(Op.getStartLoc(), "expected an absolute expression"); 2981 return MatchOperand_ParseFail; 2982 } 2983 Op.setModifiers(Mods); 2984 } 2985 return MatchOperand_Success; 2986 } 2987 2988 OperandMatchResultTy 2989 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2990 bool AllowImm) { 2991 bool Sext = trySkipId("sext"); 2992 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2993 return MatchOperand_ParseFail; 2994 2995 OperandMatchResultTy Res; 2996 if (AllowImm) { 2997 Res = parseRegOrImm(Operands); 2998 } else { 2999 Res = parseReg(Operands); 3000 } 3001 if (Res != MatchOperand_Success) { 3002 return Sext? MatchOperand_ParseFail : Res; 3003 } 3004 3005 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3006 return MatchOperand_ParseFail; 3007 3008 AMDGPUOperand::Modifiers Mods; 3009 Mods.Sext = Sext; 3010 3011 if (Mods.hasIntModifiers()) { 3012 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3013 if (Op.isExpr()) { 3014 Error(Op.getStartLoc(), "expected an absolute expression"); 3015 return MatchOperand_ParseFail; 3016 } 3017 Op.setModifiers(Mods); 3018 } 3019 3020 return MatchOperand_Success; 3021 } 3022 3023 OperandMatchResultTy 3024 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3025 return parseRegOrImmWithFPInputMods(Operands, false); 3026 } 3027 3028 OperandMatchResultTy 3029 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3030 return parseRegOrImmWithIntInputMods(Operands, false); 3031 } 3032 3033 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3034 auto Loc = getLoc(); 3035 if (trySkipId("off")) { 3036 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3037 AMDGPUOperand::ImmTyOff, false)); 3038 return MatchOperand_Success; 3039 } 3040 3041 if (!isRegister()) 3042 return MatchOperand_NoMatch; 3043 3044 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3045 if (Reg) { 3046 Operands.push_back(std::move(Reg)); 3047 return MatchOperand_Success; 3048 } 3049 3050 return MatchOperand_ParseFail; 3051 3052 } 3053 3054 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3055 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3056 3057 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3058 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3059 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3060 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3061 return Match_InvalidOperand; 3062 3063 if ((TSFlags & SIInstrFlags::VOP3) && 3064 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3065 getForcedEncodingSize() != 64) 3066 return Match_PreferE32; 3067 3068 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3069 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3070 // v_mac_f32/16 allow only dst_sel == DWORD; 3071 auto OpNum = 3072 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3073 const auto &Op = Inst.getOperand(OpNum); 3074 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3075 return Match_InvalidOperand; 3076 } 3077 } 3078 3079 return Match_Success; 3080 } 3081 3082 static ArrayRef<unsigned> getAllVariants() { 3083 static const unsigned Variants[] = { 3084 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3085 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3086 }; 3087 3088 return makeArrayRef(Variants); 3089 } 3090 3091 // What asm variants we should check 3092 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3093 if (getForcedEncodingSize() == 32) { 3094 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3095 return makeArrayRef(Variants); 3096 } 3097 3098 if (isForcedVOP3()) { 3099 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3100 return makeArrayRef(Variants); 3101 } 3102 3103 if (isForcedSDWA()) { 3104 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3105 AMDGPUAsmVariants::SDWA9}; 3106 return makeArrayRef(Variants); 3107 } 3108 3109 if (isForcedDPP()) { 3110 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3111 return makeArrayRef(Variants); 3112 } 3113 3114 return getAllVariants(); 3115 } 3116 3117 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3118 if (getForcedEncodingSize() == 32) 3119 return "e32"; 3120 3121 if (isForcedVOP3()) 3122 return "e64"; 3123 3124 if (isForcedSDWA()) 3125 return "sdwa"; 3126 3127 if (isForcedDPP()) 3128 return "dpp"; 3129 3130 return ""; 3131 } 3132 3133 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3134 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3135 const unsigned Num = Desc.getNumImplicitUses(); 3136 for (unsigned i = 0; i < Num; ++i) { 3137 unsigned Reg = Desc.ImplicitUses[i]; 3138 switch (Reg) { 3139 case AMDGPU::FLAT_SCR: 3140 case AMDGPU::VCC: 3141 case AMDGPU::VCC_LO: 3142 case AMDGPU::VCC_HI: 3143 case AMDGPU::M0: 3144 return Reg; 3145 default: 3146 break; 3147 } 3148 } 3149 return AMDGPU::NoRegister; 3150 } 3151 3152 // NB: This code is correct only when used to check constant 3153 // bus limitations because GFX7 support no f16 inline constants. 3154 // Note that there are no cases when a GFX7 opcode violates 3155 // constant bus limitations due to the use of an f16 constant. 3156 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3157 unsigned OpIdx) const { 3158 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3159 3160 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3161 return false; 3162 } 3163 3164 const MCOperand &MO = Inst.getOperand(OpIdx); 3165 3166 int64_t Val = MO.getImm(); 3167 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3168 3169 switch (OpSize) { // expected operand size 3170 case 8: 3171 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3172 case 4: 3173 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3174 case 2: { 3175 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3176 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3177 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3178 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3179 return AMDGPU::isInlinableIntLiteral(Val); 3180 3181 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3182 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3183 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3184 return AMDGPU::isInlinableIntLiteralV216(Val); 3185 3186 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3187 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3188 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3189 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3190 3191 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3192 } 3193 default: 3194 llvm_unreachable("invalid operand size"); 3195 } 3196 } 3197 3198 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3199 if (!isGFX10Plus()) 3200 return 1; 3201 3202 switch (Opcode) { 3203 // 64-bit shift instructions can use only one scalar value input 3204 case AMDGPU::V_LSHLREV_B64_e64: 3205 case AMDGPU::V_LSHLREV_B64_gfx10: 3206 case AMDGPU::V_LSHRREV_B64_e64: 3207 case AMDGPU::V_LSHRREV_B64_gfx10: 3208 case AMDGPU::V_ASHRREV_I64_e64: 3209 case AMDGPU::V_ASHRREV_I64_gfx10: 3210 case AMDGPU::V_LSHL_B64_e64: 3211 case AMDGPU::V_LSHR_B64_e64: 3212 case AMDGPU::V_ASHR_I64_e64: 3213 return 1; 3214 default: 3215 return 2; 3216 } 3217 } 3218 3219 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3220 const MCOperand &MO = Inst.getOperand(OpIdx); 3221 if (MO.isImm()) { 3222 return !isInlineConstant(Inst, OpIdx); 3223 } else if (MO.isReg()) { 3224 auto Reg = MO.getReg(); 3225 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3226 auto PReg = mc2PseudoReg(Reg); 3227 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3228 } else { 3229 return true; 3230 } 3231 } 3232 3233 bool 3234 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3235 const OperandVector &Operands) { 3236 const unsigned Opcode = Inst.getOpcode(); 3237 const MCInstrDesc &Desc = MII.get(Opcode); 3238 unsigned LastSGPR = AMDGPU::NoRegister; 3239 unsigned ConstantBusUseCount = 0; 3240 unsigned NumLiterals = 0; 3241 unsigned LiteralSize; 3242 3243 if (Desc.TSFlags & 3244 (SIInstrFlags::VOPC | 3245 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3246 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3247 SIInstrFlags::SDWA)) { 3248 // Check special imm operands (used by madmk, etc) 3249 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3250 ++ConstantBusUseCount; 3251 } 3252 3253 SmallDenseSet<unsigned> SGPRsUsed; 3254 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3255 if (SGPRUsed != AMDGPU::NoRegister) { 3256 SGPRsUsed.insert(SGPRUsed); 3257 ++ConstantBusUseCount; 3258 } 3259 3260 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3261 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3262 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3263 3264 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3265 3266 for (int OpIdx : OpIndices) { 3267 if (OpIdx == -1) break; 3268 3269 const MCOperand &MO = Inst.getOperand(OpIdx); 3270 if (usesConstantBus(Inst, OpIdx)) { 3271 if (MO.isReg()) { 3272 LastSGPR = mc2PseudoReg(MO.getReg()); 3273 // Pairs of registers with a partial intersections like these 3274 // s0, s[0:1] 3275 // flat_scratch_lo, flat_scratch 3276 // flat_scratch_lo, flat_scratch_hi 3277 // are theoretically valid but they are disabled anyway. 3278 // Note that this code mimics SIInstrInfo::verifyInstruction 3279 if (!SGPRsUsed.count(LastSGPR)) { 3280 SGPRsUsed.insert(LastSGPR); 3281 ++ConstantBusUseCount; 3282 } 3283 } else { // Expression or a literal 3284 3285 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3286 continue; // special operand like VINTERP attr_chan 3287 3288 // An instruction may use only one literal. 3289 // This has been validated on the previous step. 3290 // See validateVOP3Literal. 3291 // This literal may be used as more than one operand. 3292 // If all these operands are of the same size, 3293 // this literal counts as one scalar value. 3294 // Otherwise it counts as 2 scalar values. 3295 // See "GFX10 Shader Programming", section 3.6.2.3. 3296 3297 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3298 if (Size < 4) Size = 4; 3299 3300 if (NumLiterals == 0) { 3301 NumLiterals = 1; 3302 LiteralSize = Size; 3303 } else if (LiteralSize != Size) { 3304 NumLiterals = 2; 3305 } 3306 } 3307 } 3308 } 3309 } 3310 ConstantBusUseCount += NumLiterals; 3311 3312 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3313 return true; 3314 3315 SMLoc LitLoc = getLitLoc(Operands); 3316 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3317 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3318 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3319 return false; 3320 } 3321 3322 bool 3323 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3324 const OperandVector &Operands) { 3325 const unsigned Opcode = Inst.getOpcode(); 3326 const MCInstrDesc &Desc = MII.get(Opcode); 3327 3328 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3329 if (DstIdx == -1 || 3330 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3331 return true; 3332 } 3333 3334 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3335 3336 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3337 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3338 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3339 3340 assert(DstIdx != -1); 3341 const MCOperand &Dst = Inst.getOperand(DstIdx); 3342 assert(Dst.isReg()); 3343 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3344 3345 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3346 3347 for (int SrcIdx : SrcIndices) { 3348 if (SrcIdx == -1) break; 3349 const MCOperand &Src = Inst.getOperand(SrcIdx); 3350 if (Src.isReg()) { 3351 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3352 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3353 Error(getRegLoc(SrcReg, Operands), 3354 "destination must be different than all sources"); 3355 return false; 3356 } 3357 } 3358 } 3359 3360 return true; 3361 } 3362 3363 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3364 3365 const unsigned Opc = Inst.getOpcode(); 3366 const MCInstrDesc &Desc = MII.get(Opc); 3367 3368 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3369 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3370 assert(ClampIdx != -1); 3371 return Inst.getOperand(ClampIdx).getImm() == 0; 3372 } 3373 3374 return true; 3375 } 3376 3377 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3378 3379 const unsigned Opc = Inst.getOpcode(); 3380 const MCInstrDesc &Desc = MII.get(Opc); 3381 3382 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3383 return true; 3384 3385 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3386 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3387 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3388 3389 assert(VDataIdx != -1); 3390 3391 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3392 return true; 3393 3394 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3395 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3396 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3397 if (DMask == 0) 3398 DMask = 1; 3399 3400 unsigned DataSize = 3401 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3402 if (hasPackedD16()) { 3403 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3404 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3405 DataSize = (DataSize + 1) / 2; 3406 } 3407 3408 return (VDataSize / 4) == DataSize + TFESize; 3409 } 3410 3411 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3412 const unsigned Opc = Inst.getOpcode(); 3413 const MCInstrDesc &Desc = MII.get(Opc); 3414 3415 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3416 return true; 3417 3418 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3419 3420 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3421 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3422 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3423 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3424 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3425 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3426 3427 assert(VAddr0Idx != -1); 3428 assert(SrsrcIdx != -1); 3429 assert(SrsrcIdx > VAddr0Idx); 3430 3431 if (DimIdx == -1) 3432 return true; // intersect_ray 3433 3434 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3435 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3436 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3437 unsigned VAddrSize = 3438 IsNSA ? SrsrcIdx - VAddr0Idx 3439 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3440 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3441 3442 unsigned AddrSize = 3443 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3444 3445 if (!IsNSA) { 3446 if (AddrSize > 8) 3447 AddrSize = 16; 3448 else if (AddrSize > 4) 3449 AddrSize = 8; 3450 } 3451 3452 return VAddrSize == AddrSize; 3453 } 3454 3455 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3456 3457 const unsigned Opc = Inst.getOpcode(); 3458 const MCInstrDesc &Desc = MII.get(Opc); 3459 3460 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3461 return true; 3462 if (!Desc.mayLoad() || !Desc.mayStore()) 3463 return true; // Not atomic 3464 3465 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3466 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3467 3468 // This is an incomplete check because image_atomic_cmpswap 3469 // may only use 0x3 and 0xf while other atomic operations 3470 // may use 0x1 and 0x3. However these limitations are 3471 // verified when we check that dmask matches dst size. 3472 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3473 } 3474 3475 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3476 3477 const unsigned Opc = Inst.getOpcode(); 3478 const MCInstrDesc &Desc = MII.get(Opc); 3479 3480 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3481 return true; 3482 3483 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3484 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3485 3486 // GATHER4 instructions use dmask in a different fashion compared to 3487 // other MIMG instructions. The only useful DMASK values are 3488 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3489 // (red,red,red,red) etc.) The ISA document doesn't mention 3490 // this. 3491 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3492 } 3493 3494 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3495 const unsigned Opc = Inst.getOpcode(); 3496 const MCInstrDesc &Desc = MII.get(Opc); 3497 3498 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3499 return true; 3500 3501 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3502 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3503 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3504 3505 if (!BaseOpcode->MSAA) 3506 return true; 3507 3508 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3509 assert(DimIdx != -1); 3510 3511 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3512 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3513 3514 return DimInfo->MSAA; 3515 } 3516 3517 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3518 { 3519 switch (Opcode) { 3520 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3521 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3522 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3523 return true; 3524 default: 3525 return false; 3526 } 3527 } 3528 3529 // movrels* opcodes should only allow VGPRS as src0. 3530 // This is specified in .td description for vop1/vop3, 3531 // but sdwa is handled differently. See isSDWAOperand. 3532 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3533 const OperandVector &Operands) { 3534 3535 const unsigned Opc = Inst.getOpcode(); 3536 const MCInstrDesc &Desc = MII.get(Opc); 3537 3538 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3539 return true; 3540 3541 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3542 assert(Src0Idx != -1); 3543 3544 SMLoc ErrLoc; 3545 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3546 if (Src0.isReg()) { 3547 auto Reg = mc2PseudoReg(Src0.getReg()); 3548 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3549 if (!isSGPR(Reg, TRI)) 3550 return true; 3551 ErrLoc = getRegLoc(Reg, Operands); 3552 } else { 3553 ErrLoc = getConstLoc(Operands); 3554 } 3555 3556 Error(ErrLoc, "source operand must be a VGPR"); 3557 return false; 3558 } 3559 3560 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3561 const OperandVector &Operands) { 3562 3563 const unsigned Opc = Inst.getOpcode(); 3564 3565 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3566 return true; 3567 3568 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3569 assert(Src0Idx != -1); 3570 3571 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3572 if (!Src0.isReg()) 3573 return true; 3574 3575 auto Reg = mc2PseudoReg(Src0.getReg()); 3576 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3577 if (isSGPR(Reg, TRI)) { 3578 Error(getRegLoc(Reg, Operands), 3579 "source operand must be either a VGPR or an inline constant"); 3580 return false; 3581 } 3582 3583 return true; 3584 } 3585 3586 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3587 switch (Inst.getOpcode()) { 3588 default: 3589 return true; 3590 case V_DIV_SCALE_F32_gfx6_gfx7: 3591 case V_DIV_SCALE_F32_vi: 3592 case V_DIV_SCALE_F32_gfx10: 3593 case V_DIV_SCALE_F64_gfx6_gfx7: 3594 case V_DIV_SCALE_F64_vi: 3595 case V_DIV_SCALE_F64_gfx10: 3596 break; 3597 } 3598 3599 // TODO: Check that src0 = src1 or src2. 3600 3601 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3602 AMDGPU::OpName::src2_modifiers, 3603 AMDGPU::OpName::src2_modifiers}) { 3604 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3605 .getImm() & 3606 SISrcMods::ABS) { 3607 return false; 3608 } 3609 } 3610 3611 return true; 3612 } 3613 3614 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3615 3616 const unsigned Opc = Inst.getOpcode(); 3617 const MCInstrDesc &Desc = MII.get(Opc); 3618 3619 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3620 return true; 3621 3622 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3623 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3624 if (isCI() || isSI()) 3625 return false; 3626 } 3627 3628 return true; 3629 } 3630 3631 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3632 const unsigned Opc = Inst.getOpcode(); 3633 const MCInstrDesc &Desc = MII.get(Opc); 3634 3635 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3636 return true; 3637 3638 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3639 if (DimIdx < 0) 3640 return true; 3641 3642 long Imm = Inst.getOperand(DimIdx).getImm(); 3643 if (Imm < 0 || Imm >= 8) 3644 return false; 3645 3646 return true; 3647 } 3648 3649 static bool IsRevOpcode(const unsigned Opcode) 3650 { 3651 switch (Opcode) { 3652 case AMDGPU::V_SUBREV_F32_e32: 3653 case AMDGPU::V_SUBREV_F32_e64: 3654 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3655 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3656 case AMDGPU::V_SUBREV_F32_e32_vi: 3657 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3658 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3659 case AMDGPU::V_SUBREV_F32_e64_vi: 3660 3661 case AMDGPU::V_SUBREV_CO_U32_e32: 3662 case AMDGPU::V_SUBREV_CO_U32_e64: 3663 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3664 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3665 3666 case AMDGPU::V_SUBBREV_U32_e32: 3667 case AMDGPU::V_SUBBREV_U32_e64: 3668 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3669 case AMDGPU::V_SUBBREV_U32_e32_vi: 3670 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3671 case AMDGPU::V_SUBBREV_U32_e64_vi: 3672 3673 case AMDGPU::V_SUBREV_U32_e32: 3674 case AMDGPU::V_SUBREV_U32_e64: 3675 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3676 case AMDGPU::V_SUBREV_U32_e32_vi: 3677 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3678 case AMDGPU::V_SUBREV_U32_e64_vi: 3679 3680 case AMDGPU::V_SUBREV_F16_e32: 3681 case AMDGPU::V_SUBREV_F16_e64: 3682 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3683 case AMDGPU::V_SUBREV_F16_e32_vi: 3684 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3685 case AMDGPU::V_SUBREV_F16_e64_vi: 3686 3687 case AMDGPU::V_SUBREV_U16_e32: 3688 case AMDGPU::V_SUBREV_U16_e64: 3689 case AMDGPU::V_SUBREV_U16_e32_vi: 3690 case AMDGPU::V_SUBREV_U16_e64_vi: 3691 3692 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3693 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3694 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3695 3696 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3697 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3698 3699 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3700 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3701 3702 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3703 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3704 3705 case AMDGPU::V_LSHRREV_B32_e32: 3706 case AMDGPU::V_LSHRREV_B32_e64: 3707 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3708 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3709 case AMDGPU::V_LSHRREV_B32_e32_vi: 3710 case AMDGPU::V_LSHRREV_B32_e64_vi: 3711 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3712 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3713 3714 case AMDGPU::V_ASHRREV_I32_e32: 3715 case AMDGPU::V_ASHRREV_I32_e64: 3716 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3717 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3718 case AMDGPU::V_ASHRREV_I32_e32_vi: 3719 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3720 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3721 case AMDGPU::V_ASHRREV_I32_e64_vi: 3722 3723 case AMDGPU::V_LSHLREV_B32_e32: 3724 case AMDGPU::V_LSHLREV_B32_e64: 3725 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3726 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3727 case AMDGPU::V_LSHLREV_B32_e32_vi: 3728 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3729 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3730 case AMDGPU::V_LSHLREV_B32_e64_vi: 3731 3732 case AMDGPU::V_LSHLREV_B16_e32: 3733 case AMDGPU::V_LSHLREV_B16_e64: 3734 case AMDGPU::V_LSHLREV_B16_e32_vi: 3735 case AMDGPU::V_LSHLREV_B16_e64_vi: 3736 case AMDGPU::V_LSHLREV_B16_gfx10: 3737 3738 case AMDGPU::V_LSHRREV_B16_e32: 3739 case AMDGPU::V_LSHRREV_B16_e64: 3740 case AMDGPU::V_LSHRREV_B16_e32_vi: 3741 case AMDGPU::V_LSHRREV_B16_e64_vi: 3742 case AMDGPU::V_LSHRREV_B16_gfx10: 3743 3744 case AMDGPU::V_ASHRREV_I16_e32: 3745 case AMDGPU::V_ASHRREV_I16_e64: 3746 case AMDGPU::V_ASHRREV_I16_e32_vi: 3747 case AMDGPU::V_ASHRREV_I16_e64_vi: 3748 case AMDGPU::V_ASHRREV_I16_gfx10: 3749 3750 case AMDGPU::V_LSHLREV_B64_e64: 3751 case AMDGPU::V_LSHLREV_B64_gfx10: 3752 case AMDGPU::V_LSHLREV_B64_vi: 3753 3754 case AMDGPU::V_LSHRREV_B64_e64: 3755 case AMDGPU::V_LSHRREV_B64_gfx10: 3756 case AMDGPU::V_LSHRREV_B64_vi: 3757 3758 case AMDGPU::V_ASHRREV_I64_e64: 3759 case AMDGPU::V_ASHRREV_I64_gfx10: 3760 case AMDGPU::V_ASHRREV_I64_vi: 3761 3762 case AMDGPU::V_PK_LSHLREV_B16: 3763 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3764 case AMDGPU::V_PK_LSHLREV_B16_vi: 3765 3766 case AMDGPU::V_PK_LSHRREV_B16: 3767 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3768 case AMDGPU::V_PK_LSHRREV_B16_vi: 3769 case AMDGPU::V_PK_ASHRREV_I16: 3770 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3771 case AMDGPU::V_PK_ASHRREV_I16_vi: 3772 return true; 3773 default: 3774 return false; 3775 } 3776 } 3777 3778 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3779 3780 using namespace SIInstrFlags; 3781 const unsigned Opcode = Inst.getOpcode(); 3782 const MCInstrDesc &Desc = MII.get(Opcode); 3783 3784 // lds_direct register is defined so that it can be used 3785 // with 9-bit operands only. Ignore encodings which do not accept these. 3786 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3787 if ((Desc.TSFlags & Enc) == 0) 3788 return None; 3789 3790 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3791 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3792 if (SrcIdx == -1) 3793 break; 3794 const auto &Src = Inst.getOperand(SrcIdx); 3795 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3796 3797 if (isGFX90A()) 3798 return StringRef("lds_direct is not supported on this GPU"); 3799 3800 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3801 return StringRef("lds_direct cannot be used with this instruction"); 3802 3803 if (SrcName != OpName::src0) 3804 return StringRef("lds_direct may be used as src0 only"); 3805 } 3806 } 3807 3808 return None; 3809 } 3810 3811 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3812 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3813 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3814 if (Op.isFlatOffset()) 3815 return Op.getStartLoc(); 3816 } 3817 return getLoc(); 3818 } 3819 3820 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3821 const OperandVector &Operands) { 3822 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3823 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3824 return true; 3825 3826 auto Opcode = Inst.getOpcode(); 3827 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3828 assert(OpNum != -1); 3829 3830 const auto &Op = Inst.getOperand(OpNum); 3831 if (!hasFlatOffsets() && Op.getImm() != 0) { 3832 Error(getFlatOffsetLoc(Operands), 3833 "flat offset modifier is not supported on this GPU"); 3834 return false; 3835 } 3836 3837 // For FLAT segment the offset must be positive; 3838 // MSB is ignored and forced to zero. 3839 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3840 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3841 if (!isIntN(OffsetSize, Op.getImm())) { 3842 Error(getFlatOffsetLoc(Operands), 3843 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3844 return false; 3845 } 3846 } else { 3847 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3848 if (!isUIntN(OffsetSize, Op.getImm())) { 3849 Error(getFlatOffsetLoc(Operands), 3850 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3851 return false; 3852 } 3853 } 3854 3855 return true; 3856 } 3857 3858 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3859 // Start with second operand because SMEM Offset cannot be dst or src0. 3860 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3861 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3862 if (Op.isSMEMOffset()) 3863 return Op.getStartLoc(); 3864 } 3865 return getLoc(); 3866 } 3867 3868 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3869 const OperandVector &Operands) { 3870 if (isCI() || isSI()) 3871 return true; 3872 3873 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3874 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3875 return true; 3876 3877 auto Opcode = Inst.getOpcode(); 3878 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3879 if (OpNum == -1) 3880 return true; 3881 3882 const auto &Op = Inst.getOperand(OpNum); 3883 if (!Op.isImm()) 3884 return true; 3885 3886 uint64_t Offset = Op.getImm(); 3887 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3888 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3889 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3890 return true; 3891 3892 Error(getSMEMOffsetLoc(Operands), 3893 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3894 "expected a 21-bit signed offset"); 3895 3896 return false; 3897 } 3898 3899 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3900 unsigned Opcode = Inst.getOpcode(); 3901 const MCInstrDesc &Desc = MII.get(Opcode); 3902 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3903 return true; 3904 3905 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3906 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3907 3908 const int OpIndices[] = { Src0Idx, Src1Idx }; 3909 3910 unsigned NumExprs = 0; 3911 unsigned NumLiterals = 0; 3912 uint32_t LiteralValue; 3913 3914 for (int OpIdx : OpIndices) { 3915 if (OpIdx == -1) break; 3916 3917 const MCOperand &MO = Inst.getOperand(OpIdx); 3918 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3919 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3920 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3921 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3922 if (NumLiterals == 0 || LiteralValue != Value) { 3923 LiteralValue = Value; 3924 ++NumLiterals; 3925 } 3926 } else if (MO.isExpr()) { 3927 ++NumExprs; 3928 } 3929 } 3930 } 3931 3932 return NumLiterals + NumExprs <= 1; 3933 } 3934 3935 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3936 const unsigned Opc = Inst.getOpcode(); 3937 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3938 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3939 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3940 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3941 3942 if (OpSel & ~3) 3943 return false; 3944 } 3945 return true; 3946 } 3947 3948 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3949 const OperandVector &Operands) { 3950 const unsigned Opc = Inst.getOpcode(); 3951 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3952 if (DppCtrlIdx < 0) 3953 return true; 3954 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3955 3956 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3957 // DPP64 is supported for row_newbcast only. 3958 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3959 if (Src0Idx >= 0 && 3960 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3961 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3962 Error(S, "64 bit dpp only supports row_newbcast"); 3963 return false; 3964 } 3965 } 3966 3967 return true; 3968 } 3969 3970 // Check if VCC register matches wavefront size 3971 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3972 auto FB = getFeatureBits(); 3973 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3974 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3975 } 3976 3977 // VOP3 literal is only allowed in GFX10+ and only one can be used 3978 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3979 const OperandVector &Operands) { 3980 unsigned Opcode = Inst.getOpcode(); 3981 const MCInstrDesc &Desc = MII.get(Opcode); 3982 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3983 return true; 3984 3985 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3986 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3987 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3988 3989 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3990 3991 unsigned NumExprs = 0; 3992 unsigned NumLiterals = 0; 3993 uint32_t LiteralValue; 3994 3995 for (int OpIdx : OpIndices) { 3996 if (OpIdx == -1) break; 3997 3998 const MCOperand &MO = Inst.getOperand(OpIdx); 3999 if (!MO.isImm() && !MO.isExpr()) 4000 continue; 4001 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4002 continue; 4003 4004 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4005 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4006 Error(getConstLoc(Operands), 4007 "inline constants are not allowed for this operand"); 4008 return false; 4009 } 4010 4011 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4012 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4013 if (NumLiterals == 0 || LiteralValue != Value) { 4014 LiteralValue = Value; 4015 ++NumLiterals; 4016 } 4017 } else if (MO.isExpr()) { 4018 ++NumExprs; 4019 } 4020 } 4021 NumLiterals += NumExprs; 4022 4023 if (!NumLiterals) 4024 return true; 4025 4026 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4027 Error(getLitLoc(Operands), "literal operands are not supported"); 4028 return false; 4029 } 4030 4031 if (NumLiterals > 1) { 4032 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4033 return false; 4034 } 4035 4036 return true; 4037 } 4038 4039 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4040 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4041 const MCRegisterInfo *MRI) { 4042 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4043 if (OpIdx < 0) 4044 return -1; 4045 4046 const MCOperand &Op = Inst.getOperand(OpIdx); 4047 if (!Op.isReg()) 4048 return -1; 4049 4050 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4051 auto Reg = Sub ? Sub : Op.getReg(); 4052 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4053 return AGRP32.contains(Reg) ? 1 : 0; 4054 } 4055 4056 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4057 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4058 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4059 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4060 SIInstrFlags::DS)) == 0) 4061 return true; 4062 4063 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4064 : AMDGPU::OpName::vdata; 4065 4066 const MCRegisterInfo *MRI = getMRI(); 4067 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4068 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4069 4070 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4071 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4072 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4073 return false; 4074 } 4075 4076 auto FB = getFeatureBits(); 4077 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4078 if (DataAreg < 0 || DstAreg < 0) 4079 return true; 4080 return DstAreg == DataAreg; 4081 } 4082 4083 return DstAreg < 1 && DataAreg < 1; 4084 } 4085 4086 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4087 auto FB = getFeatureBits(); 4088 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4089 return true; 4090 4091 const MCRegisterInfo *MRI = getMRI(); 4092 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4093 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4094 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4095 const MCOperand &Op = Inst.getOperand(I); 4096 if (!Op.isReg()) 4097 continue; 4098 4099 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4100 if (!Sub) 4101 continue; 4102 4103 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4104 return false; 4105 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4106 return false; 4107 } 4108 4109 return true; 4110 } 4111 4112 // gfx90a has an undocumented limitation: 4113 // DS_GWS opcodes must use even aligned registers. 4114 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4115 const OperandVector &Operands) { 4116 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4117 return true; 4118 4119 int Opc = Inst.getOpcode(); 4120 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4121 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4122 return true; 4123 4124 const MCRegisterInfo *MRI = getMRI(); 4125 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4126 int Data0Pos = 4127 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4128 assert(Data0Pos != -1); 4129 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4130 auto RegIdx = Reg - (VGRP32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4131 if (RegIdx & 1) { 4132 SMLoc RegLoc = getRegLoc(Reg, Operands); 4133 Error(RegLoc, "vgpr must be even aligned"); 4134 return false; 4135 } 4136 4137 return true; 4138 } 4139 4140 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4141 const OperandVector &Operands, 4142 const SMLoc &IDLoc) { 4143 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4144 AMDGPU::OpName::cpol); 4145 if (CPolPos == -1) 4146 return true; 4147 4148 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4149 4150 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4151 if ((TSFlags & (SIInstrFlags::SMRD)) && 4152 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4153 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4154 return false; 4155 } 4156 4157 if (isGFX90A() && (CPol & CPol::SCC)) { 4158 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4159 StringRef CStr(S.getPointer()); 4160 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4161 Error(S, "scc is not supported on this GPU"); 4162 return false; 4163 } 4164 4165 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4166 return true; 4167 4168 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4169 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4170 Error(IDLoc, "instruction must use glc"); 4171 return false; 4172 } 4173 } else { 4174 if (CPol & CPol::GLC) { 4175 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4176 StringRef CStr(S.getPointer()); 4177 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4178 Error(S, "instruction must not use glc"); 4179 return false; 4180 } 4181 } 4182 4183 return true; 4184 } 4185 4186 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4187 const SMLoc &IDLoc, 4188 const OperandVector &Operands) { 4189 if (auto ErrMsg = validateLdsDirect(Inst)) { 4190 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4191 return false; 4192 } 4193 if (!validateSOPLiteral(Inst)) { 4194 Error(getLitLoc(Operands), 4195 "only one literal operand is allowed"); 4196 return false; 4197 } 4198 if (!validateVOP3Literal(Inst, Operands)) { 4199 return false; 4200 } 4201 if (!validateConstantBusLimitations(Inst, Operands)) { 4202 return false; 4203 } 4204 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4205 return false; 4206 } 4207 if (!validateIntClampSupported(Inst)) { 4208 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4209 "integer clamping is not supported on this GPU"); 4210 return false; 4211 } 4212 if (!validateOpSel(Inst)) { 4213 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4214 "invalid op_sel operand"); 4215 return false; 4216 } 4217 if (!validateDPP(Inst, Operands)) { 4218 return false; 4219 } 4220 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4221 if (!validateMIMGD16(Inst)) { 4222 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4223 "d16 modifier is not supported on this GPU"); 4224 return false; 4225 } 4226 if (!validateMIMGDim(Inst)) { 4227 Error(IDLoc, "dim modifier is required on this GPU"); 4228 return false; 4229 } 4230 if (!validateMIMGMSAA(Inst)) { 4231 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4232 "invalid dim; must be MSAA type"); 4233 return false; 4234 } 4235 if (!validateMIMGDataSize(Inst)) { 4236 Error(IDLoc, 4237 "image data size does not match dmask and tfe"); 4238 return false; 4239 } 4240 if (!validateMIMGAddrSize(Inst)) { 4241 Error(IDLoc, 4242 "image address size does not match dim and a16"); 4243 return false; 4244 } 4245 if (!validateMIMGAtomicDMask(Inst)) { 4246 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4247 "invalid atomic image dmask"); 4248 return false; 4249 } 4250 if (!validateMIMGGatherDMask(Inst)) { 4251 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4252 "invalid image_gather dmask: only one bit must be set"); 4253 return false; 4254 } 4255 if (!validateMovrels(Inst, Operands)) { 4256 return false; 4257 } 4258 if (!validateFlatOffset(Inst, Operands)) { 4259 return false; 4260 } 4261 if (!validateSMEMOffset(Inst, Operands)) { 4262 return false; 4263 } 4264 if (!validateMAIAccWrite(Inst, Operands)) { 4265 return false; 4266 } 4267 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4268 return false; 4269 } 4270 4271 if (!validateAGPRLdSt(Inst)) { 4272 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4273 ? "invalid register class: data and dst should be all VGPR or AGPR" 4274 : "invalid register class: agpr loads and stores not supported on this GPU" 4275 ); 4276 return false; 4277 } 4278 if (!validateVGPRAlign(Inst)) { 4279 Error(IDLoc, 4280 "invalid register class: vgpr tuples must be 64 bit aligned"); 4281 return false; 4282 } 4283 if (!validateGWS(Inst, Operands)) { 4284 return false; 4285 } 4286 4287 if (!validateDivScale(Inst)) { 4288 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4289 return false; 4290 } 4291 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4292 return false; 4293 } 4294 4295 return true; 4296 } 4297 4298 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4299 const FeatureBitset &FBS, 4300 unsigned VariantID = 0); 4301 4302 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4303 const FeatureBitset &AvailableFeatures, 4304 unsigned VariantID); 4305 4306 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4307 const FeatureBitset &FBS) { 4308 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4309 } 4310 4311 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4312 const FeatureBitset &FBS, 4313 ArrayRef<unsigned> Variants) { 4314 for (auto Variant : Variants) { 4315 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4316 return true; 4317 } 4318 4319 return false; 4320 } 4321 4322 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4323 const SMLoc &IDLoc) { 4324 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4325 4326 // Check if requested instruction variant is supported. 4327 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4328 return false; 4329 4330 // This instruction is not supported. 4331 // Clear any other pending errors because they are no longer relevant. 4332 getParser().clearPendingErrors(); 4333 4334 // Requested instruction variant is not supported. 4335 // Check if any other variants are supported. 4336 StringRef VariantName = getMatchedVariantName(); 4337 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4338 return Error(IDLoc, 4339 Twine(VariantName, 4340 " variant of this instruction is not supported")); 4341 } 4342 4343 // Finally check if this instruction is supported on any other GPU. 4344 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4345 return Error(IDLoc, "instruction not supported on this GPU"); 4346 } 4347 4348 // Instruction not supported on any GPU. Probably a typo. 4349 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4350 return Error(IDLoc, "invalid instruction" + Suggestion); 4351 } 4352 4353 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4354 OperandVector &Operands, 4355 MCStreamer &Out, 4356 uint64_t &ErrorInfo, 4357 bool MatchingInlineAsm) { 4358 MCInst Inst; 4359 unsigned Result = Match_Success; 4360 for (auto Variant : getMatchedVariants()) { 4361 uint64_t EI; 4362 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4363 Variant); 4364 // We order match statuses from least to most specific. We use most specific 4365 // status as resulting 4366 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4367 if ((R == Match_Success) || 4368 (R == Match_PreferE32) || 4369 (R == Match_MissingFeature && Result != Match_PreferE32) || 4370 (R == Match_InvalidOperand && Result != Match_MissingFeature 4371 && Result != Match_PreferE32) || 4372 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4373 && Result != Match_MissingFeature 4374 && Result != Match_PreferE32)) { 4375 Result = R; 4376 ErrorInfo = EI; 4377 } 4378 if (R == Match_Success) 4379 break; 4380 } 4381 4382 if (Result == Match_Success) { 4383 if (!validateInstruction(Inst, IDLoc, Operands)) { 4384 return true; 4385 } 4386 Inst.setLoc(IDLoc); 4387 Out.emitInstruction(Inst, getSTI()); 4388 return false; 4389 } 4390 4391 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4392 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4393 return true; 4394 } 4395 4396 switch (Result) { 4397 default: break; 4398 case Match_MissingFeature: 4399 // It has been verified that the specified instruction 4400 // mnemonic is valid. A match was found but it requires 4401 // features which are not supported on this GPU. 4402 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4403 4404 case Match_InvalidOperand: { 4405 SMLoc ErrorLoc = IDLoc; 4406 if (ErrorInfo != ~0ULL) { 4407 if (ErrorInfo >= Operands.size()) { 4408 return Error(IDLoc, "too few operands for instruction"); 4409 } 4410 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4411 if (ErrorLoc == SMLoc()) 4412 ErrorLoc = IDLoc; 4413 } 4414 return Error(ErrorLoc, "invalid operand for instruction"); 4415 } 4416 4417 case Match_PreferE32: 4418 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4419 "should be encoded as e32"); 4420 case Match_MnemonicFail: 4421 llvm_unreachable("Invalid instructions should have been handled already"); 4422 } 4423 llvm_unreachable("Implement any new match types added!"); 4424 } 4425 4426 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4427 int64_t Tmp = -1; 4428 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4429 return true; 4430 } 4431 if (getParser().parseAbsoluteExpression(Tmp)) { 4432 return true; 4433 } 4434 Ret = static_cast<uint32_t>(Tmp); 4435 return false; 4436 } 4437 4438 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4439 uint32_t &Minor) { 4440 if (ParseAsAbsoluteExpression(Major)) 4441 return TokError("invalid major version"); 4442 4443 if (!trySkipToken(AsmToken::Comma)) 4444 return TokError("minor version number required, comma expected"); 4445 4446 if (ParseAsAbsoluteExpression(Minor)) 4447 return TokError("invalid minor version"); 4448 4449 return false; 4450 } 4451 4452 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4453 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4454 return TokError("directive only supported for amdgcn architecture"); 4455 4456 std::string TargetIDDirective; 4457 SMLoc TargetStart = getTok().getLoc(); 4458 if (getParser().parseEscapedString(TargetIDDirective)) 4459 return true; 4460 4461 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4462 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4463 return getParser().Error(TargetRange.Start, 4464 (Twine(".amdgcn_target directive's target id ") + 4465 Twine(TargetIDDirective) + 4466 Twine(" does not match the specified target id ") + 4467 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4468 4469 return false; 4470 } 4471 4472 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4473 return Error(Range.Start, "value out of range", Range); 4474 } 4475 4476 bool AMDGPUAsmParser::calculateGPRBlocks( 4477 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4478 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4479 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4480 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4481 // TODO(scott.linder): These calculations are duplicated from 4482 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4483 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4484 4485 unsigned NumVGPRs = NextFreeVGPR; 4486 unsigned NumSGPRs = NextFreeSGPR; 4487 4488 if (Version.Major >= 10) 4489 NumSGPRs = 0; 4490 else { 4491 unsigned MaxAddressableNumSGPRs = 4492 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4493 4494 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4495 NumSGPRs > MaxAddressableNumSGPRs) 4496 return OutOfRangeError(SGPRRange); 4497 4498 NumSGPRs += 4499 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4500 4501 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4502 NumSGPRs > MaxAddressableNumSGPRs) 4503 return OutOfRangeError(SGPRRange); 4504 4505 if (Features.test(FeatureSGPRInitBug)) 4506 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4507 } 4508 4509 VGPRBlocks = 4510 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4511 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4512 4513 return false; 4514 } 4515 4516 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4517 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4518 return TokError("directive only supported for amdgcn architecture"); 4519 4520 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4521 return TokError("directive only supported for amdhsa OS"); 4522 4523 StringRef KernelName; 4524 if (getParser().parseIdentifier(KernelName)) 4525 return true; 4526 4527 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4528 4529 StringSet<> Seen; 4530 4531 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4532 4533 SMRange VGPRRange; 4534 uint64_t NextFreeVGPR = 0; 4535 uint64_t AccumOffset = 0; 4536 SMRange SGPRRange; 4537 uint64_t NextFreeSGPR = 0; 4538 unsigned UserSGPRCount = 0; 4539 bool ReserveVCC = true; 4540 bool ReserveFlatScr = true; 4541 Optional<bool> EnableWavefrontSize32; 4542 4543 while (true) { 4544 while (trySkipToken(AsmToken::EndOfStatement)); 4545 4546 StringRef ID; 4547 SMRange IDRange = getTok().getLocRange(); 4548 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4549 return true; 4550 4551 if (ID == ".end_amdhsa_kernel") 4552 break; 4553 4554 if (Seen.find(ID) != Seen.end()) 4555 return TokError(".amdhsa_ directives cannot be repeated"); 4556 Seen.insert(ID); 4557 4558 SMLoc ValStart = getLoc(); 4559 int64_t IVal; 4560 if (getParser().parseAbsoluteExpression(IVal)) 4561 return true; 4562 SMLoc ValEnd = getLoc(); 4563 SMRange ValRange = SMRange(ValStart, ValEnd); 4564 4565 if (IVal < 0) 4566 return OutOfRangeError(ValRange); 4567 4568 uint64_t Val = IVal; 4569 4570 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4571 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4572 return OutOfRangeError(RANGE); \ 4573 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4574 4575 if (ID == ".amdhsa_group_segment_fixed_size") { 4576 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4577 return OutOfRangeError(ValRange); 4578 KD.group_segment_fixed_size = Val; 4579 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4580 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4581 return OutOfRangeError(ValRange); 4582 KD.private_segment_fixed_size = Val; 4583 } else if (ID == ".amdhsa_kernarg_size") { 4584 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4585 return OutOfRangeError(ValRange); 4586 KD.kernarg_size = Val; 4587 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4588 if (hasArchitectedFlatScratch()) 4589 return Error(IDRange.Start, 4590 "directive is not supported with architected flat scratch", 4591 IDRange); 4592 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4593 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4594 Val, ValRange); 4595 if (Val) 4596 UserSGPRCount += 4; 4597 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4598 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4599 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4600 ValRange); 4601 if (Val) 4602 UserSGPRCount += 2; 4603 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4604 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4605 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4606 ValRange); 4607 if (Val) 4608 UserSGPRCount += 2; 4609 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4610 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4611 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4612 Val, ValRange); 4613 if (Val) 4614 UserSGPRCount += 2; 4615 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4616 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4617 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4618 ValRange); 4619 if (Val) 4620 UserSGPRCount += 2; 4621 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4622 if (hasArchitectedFlatScratch()) 4623 return Error(IDRange.Start, 4624 "directive is not supported with architected flat scratch", 4625 IDRange); 4626 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4627 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4628 ValRange); 4629 if (Val) 4630 UserSGPRCount += 2; 4631 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4632 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4633 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4634 Val, ValRange); 4635 if (Val) 4636 UserSGPRCount += 1; 4637 } else if (ID == ".amdhsa_wavefront_size32") { 4638 if (IVersion.Major < 10) 4639 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4640 EnableWavefrontSize32 = Val; 4641 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4642 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4643 Val, ValRange); 4644 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4645 if (hasArchitectedFlatScratch()) 4646 return Error(IDRange.Start, 4647 "directive is not supported with architected flat scratch", 4648 IDRange); 4649 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4650 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4651 } else if (ID == ".amdhsa_enable_private_segment") { 4652 if (!hasArchitectedFlatScratch()) 4653 return Error( 4654 IDRange.Start, 4655 "directive is not supported without architected flat scratch", 4656 IDRange); 4657 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4658 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4659 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4660 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4661 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4662 ValRange); 4663 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4664 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4665 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4666 ValRange); 4667 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4668 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4669 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4670 ValRange); 4671 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4672 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4673 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4674 ValRange); 4675 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4676 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4677 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4678 ValRange); 4679 } else if (ID == ".amdhsa_next_free_vgpr") { 4680 VGPRRange = ValRange; 4681 NextFreeVGPR = Val; 4682 } else if (ID == ".amdhsa_next_free_sgpr") { 4683 SGPRRange = ValRange; 4684 NextFreeSGPR = Val; 4685 } else if (ID == ".amdhsa_accum_offset") { 4686 if (!isGFX90A()) 4687 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4688 AccumOffset = Val; 4689 } else if (ID == ".amdhsa_reserve_vcc") { 4690 if (!isUInt<1>(Val)) 4691 return OutOfRangeError(ValRange); 4692 ReserveVCC = Val; 4693 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4694 if (IVersion.Major < 7) 4695 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4696 if (hasArchitectedFlatScratch()) 4697 return Error(IDRange.Start, 4698 "directive is not supported with architected flat scratch", 4699 IDRange); 4700 if (!isUInt<1>(Val)) 4701 return OutOfRangeError(ValRange); 4702 ReserveFlatScr = Val; 4703 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4704 if (IVersion.Major < 8) 4705 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4706 if (!isUInt<1>(Val)) 4707 return OutOfRangeError(ValRange); 4708 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4709 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4710 IDRange); 4711 } else if (ID == ".amdhsa_float_round_mode_32") { 4712 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4713 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4714 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4715 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4716 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4717 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4718 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4719 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4720 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4722 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4723 ValRange); 4724 } else if (ID == ".amdhsa_dx10_clamp") { 4725 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4726 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4727 } else if (ID == ".amdhsa_ieee_mode") { 4728 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4729 Val, ValRange); 4730 } else if (ID == ".amdhsa_fp16_overflow") { 4731 if (IVersion.Major < 9) 4732 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4733 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4734 ValRange); 4735 } else if (ID == ".amdhsa_tg_split") { 4736 if (!isGFX90A()) 4737 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4738 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4739 ValRange); 4740 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4741 if (IVersion.Major < 10) 4742 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4744 ValRange); 4745 } else if (ID == ".amdhsa_memory_ordered") { 4746 if (IVersion.Major < 10) 4747 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4748 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4749 ValRange); 4750 } else if (ID == ".amdhsa_forward_progress") { 4751 if (IVersion.Major < 10) 4752 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4753 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4754 ValRange); 4755 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4756 PARSE_BITS_ENTRY( 4757 KD.compute_pgm_rsrc2, 4758 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4759 ValRange); 4760 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4761 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4762 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4763 Val, ValRange); 4764 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4765 PARSE_BITS_ENTRY( 4766 KD.compute_pgm_rsrc2, 4767 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4768 ValRange); 4769 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4771 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4772 Val, ValRange); 4773 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4775 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4776 Val, ValRange); 4777 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4778 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4779 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4780 Val, ValRange); 4781 } else if (ID == ".amdhsa_exception_int_div_zero") { 4782 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4783 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4784 Val, ValRange); 4785 } else { 4786 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4787 } 4788 4789 #undef PARSE_BITS_ENTRY 4790 } 4791 4792 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4793 return TokError(".amdhsa_next_free_vgpr directive is required"); 4794 4795 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4796 return TokError(".amdhsa_next_free_sgpr directive is required"); 4797 4798 unsigned VGPRBlocks; 4799 unsigned SGPRBlocks; 4800 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4801 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4802 EnableWavefrontSize32, NextFreeVGPR, 4803 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4804 SGPRBlocks)) 4805 return true; 4806 4807 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4808 VGPRBlocks)) 4809 return OutOfRangeError(VGPRRange); 4810 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4811 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4812 4813 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4814 SGPRBlocks)) 4815 return OutOfRangeError(SGPRRange); 4816 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4817 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4818 SGPRBlocks); 4819 4820 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4821 return TokError("too many user SGPRs enabled"); 4822 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4823 UserSGPRCount); 4824 4825 if (isGFX90A()) { 4826 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4827 return TokError(".amdhsa_accum_offset directive is required"); 4828 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4829 return TokError("accum_offset should be in range [4..256] in " 4830 "increments of 4"); 4831 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4832 return TokError("accum_offset exceeds total VGPR allocation"); 4833 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4834 (AccumOffset / 4 - 1)); 4835 } 4836 4837 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4838 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4839 ReserveFlatScr); 4840 return false; 4841 } 4842 4843 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4844 uint32_t Major; 4845 uint32_t Minor; 4846 4847 if (ParseDirectiveMajorMinor(Major, Minor)) 4848 return true; 4849 4850 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4851 return false; 4852 } 4853 4854 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4855 uint32_t Major; 4856 uint32_t Minor; 4857 uint32_t Stepping; 4858 StringRef VendorName; 4859 StringRef ArchName; 4860 4861 // If this directive has no arguments, then use the ISA version for the 4862 // targeted GPU. 4863 if (isToken(AsmToken::EndOfStatement)) { 4864 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4865 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4866 ISA.Stepping, 4867 "AMD", "AMDGPU"); 4868 return false; 4869 } 4870 4871 if (ParseDirectiveMajorMinor(Major, Minor)) 4872 return true; 4873 4874 if (!trySkipToken(AsmToken::Comma)) 4875 return TokError("stepping version number required, comma expected"); 4876 4877 if (ParseAsAbsoluteExpression(Stepping)) 4878 return TokError("invalid stepping version"); 4879 4880 if (!trySkipToken(AsmToken::Comma)) 4881 return TokError("vendor name required, comma expected"); 4882 4883 if (!parseString(VendorName, "invalid vendor name")) 4884 return true; 4885 4886 if (!trySkipToken(AsmToken::Comma)) 4887 return TokError("arch name required, comma expected"); 4888 4889 if (!parseString(ArchName, "invalid arch name")) 4890 return true; 4891 4892 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4893 VendorName, ArchName); 4894 return false; 4895 } 4896 4897 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4898 amd_kernel_code_t &Header) { 4899 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4900 // assembly for backwards compatibility. 4901 if (ID == "max_scratch_backing_memory_byte_size") { 4902 Parser.eatToEndOfStatement(); 4903 return false; 4904 } 4905 4906 SmallString<40> ErrStr; 4907 raw_svector_ostream Err(ErrStr); 4908 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4909 return TokError(Err.str()); 4910 } 4911 Lex(); 4912 4913 if (ID == "enable_wavefront_size32") { 4914 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4915 if (!isGFX10Plus()) 4916 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4917 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4918 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4919 } else { 4920 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4921 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4922 } 4923 } 4924 4925 if (ID == "wavefront_size") { 4926 if (Header.wavefront_size == 5) { 4927 if (!isGFX10Plus()) 4928 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4929 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4930 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4931 } else if (Header.wavefront_size == 6) { 4932 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4933 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4934 } 4935 } 4936 4937 if (ID == "enable_wgp_mode") { 4938 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4939 !isGFX10Plus()) 4940 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4941 } 4942 4943 if (ID == "enable_mem_ordered") { 4944 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4945 !isGFX10Plus()) 4946 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4947 } 4948 4949 if (ID == "enable_fwd_progress") { 4950 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4951 !isGFX10Plus()) 4952 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4953 } 4954 4955 return false; 4956 } 4957 4958 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4959 amd_kernel_code_t Header; 4960 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4961 4962 while (true) { 4963 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4964 // will set the current token to EndOfStatement. 4965 while(trySkipToken(AsmToken::EndOfStatement)); 4966 4967 StringRef ID; 4968 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4969 return true; 4970 4971 if (ID == ".end_amd_kernel_code_t") 4972 break; 4973 4974 if (ParseAMDKernelCodeTValue(ID, Header)) 4975 return true; 4976 } 4977 4978 getTargetStreamer().EmitAMDKernelCodeT(Header); 4979 4980 return false; 4981 } 4982 4983 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4984 StringRef KernelName; 4985 if (!parseId(KernelName, "expected symbol name")) 4986 return true; 4987 4988 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4989 ELF::STT_AMDGPU_HSA_KERNEL); 4990 4991 KernelScope.initialize(getContext()); 4992 return false; 4993 } 4994 4995 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4996 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4997 return Error(getLoc(), 4998 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4999 "architectures"); 5000 } 5001 5002 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5003 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5004 return Error(getParser().getTok().getLoc(), "target id must match options"); 5005 5006 getTargetStreamer().EmitISAVersion(); 5007 Lex(); 5008 5009 return false; 5010 } 5011 5012 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5013 const char *AssemblerDirectiveBegin; 5014 const char *AssemblerDirectiveEnd; 5015 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5016 isHsaAbiVersion3Or4(&getSTI()) 5017 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5018 HSAMD::V3::AssemblerDirectiveEnd) 5019 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5020 HSAMD::AssemblerDirectiveEnd); 5021 5022 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5023 return Error(getLoc(), 5024 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5025 "not available on non-amdhsa OSes")).str()); 5026 } 5027 5028 std::string HSAMetadataString; 5029 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5030 HSAMetadataString)) 5031 return true; 5032 5033 if (isHsaAbiVersion3Or4(&getSTI())) { 5034 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5035 return Error(getLoc(), "invalid HSA metadata"); 5036 } else { 5037 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5038 return Error(getLoc(), "invalid HSA metadata"); 5039 } 5040 5041 return false; 5042 } 5043 5044 /// Common code to parse out a block of text (typically YAML) between start and 5045 /// end directives. 5046 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5047 const char *AssemblerDirectiveEnd, 5048 std::string &CollectString) { 5049 5050 raw_string_ostream CollectStream(CollectString); 5051 5052 getLexer().setSkipSpace(false); 5053 5054 bool FoundEnd = false; 5055 while (!isToken(AsmToken::Eof)) { 5056 while (isToken(AsmToken::Space)) { 5057 CollectStream << getTokenStr(); 5058 Lex(); 5059 } 5060 5061 if (trySkipId(AssemblerDirectiveEnd)) { 5062 FoundEnd = true; 5063 break; 5064 } 5065 5066 CollectStream << Parser.parseStringToEndOfStatement() 5067 << getContext().getAsmInfo()->getSeparatorString(); 5068 5069 Parser.eatToEndOfStatement(); 5070 } 5071 5072 getLexer().setSkipSpace(true); 5073 5074 if (isToken(AsmToken::Eof) && !FoundEnd) { 5075 return TokError(Twine("expected directive ") + 5076 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5077 } 5078 5079 CollectStream.flush(); 5080 return false; 5081 } 5082 5083 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5084 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5085 std::string String; 5086 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5087 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5088 return true; 5089 5090 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5091 if (!PALMetadata->setFromString(String)) 5092 return Error(getLoc(), "invalid PAL metadata"); 5093 return false; 5094 } 5095 5096 /// Parse the assembler directive for old linear-format PAL metadata. 5097 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5098 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5099 return Error(getLoc(), 5100 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5101 "not available on non-amdpal OSes")).str()); 5102 } 5103 5104 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5105 PALMetadata->setLegacy(); 5106 for (;;) { 5107 uint32_t Key, Value; 5108 if (ParseAsAbsoluteExpression(Key)) { 5109 return TokError(Twine("invalid value in ") + 5110 Twine(PALMD::AssemblerDirective)); 5111 } 5112 if (!trySkipToken(AsmToken::Comma)) { 5113 return TokError(Twine("expected an even number of values in ") + 5114 Twine(PALMD::AssemblerDirective)); 5115 } 5116 if (ParseAsAbsoluteExpression(Value)) { 5117 return TokError(Twine("invalid value in ") + 5118 Twine(PALMD::AssemblerDirective)); 5119 } 5120 PALMetadata->setRegister(Key, Value); 5121 if (!trySkipToken(AsmToken::Comma)) 5122 break; 5123 } 5124 return false; 5125 } 5126 5127 /// ParseDirectiveAMDGPULDS 5128 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5129 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5130 if (getParser().checkForValidSection()) 5131 return true; 5132 5133 StringRef Name; 5134 SMLoc NameLoc = getLoc(); 5135 if (getParser().parseIdentifier(Name)) 5136 return TokError("expected identifier in directive"); 5137 5138 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5139 if (parseToken(AsmToken::Comma, "expected ','")) 5140 return true; 5141 5142 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5143 5144 int64_t Size; 5145 SMLoc SizeLoc = getLoc(); 5146 if (getParser().parseAbsoluteExpression(Size)) 5147 return true; 5148 if (Size < 0) 5149 return Error(SizeLoc, "size must be non-negative"); 5150 if (Size > LocalMemorySize) 5151 return Error(SizeLoc, "size is too large"); 5152 5153 int64_t Alignment = 4; 5154 if (trySkipToken(AsmToken::Comma)) { 5155 SMLoc AlignLoc = getLoc(); 5156 if (getParser().parseAbsoluteExpression(Alignment)) 5157 return true; 5158 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5159 return Error(AlignLoc, "alignment must be a power of two"); 5160 5161 // Alignment larger than the size of LDS is possible in theory, as long 5162 // as the linker manages to place to symbol at address 0, but we do want 5163 // to make sure the alignment fits nicely into a 32-bit integer. 5164 if (Alignment >= 1u << 31) 5165 return Error(AlignLoc, "alignment is too large"); 5166 } 5167 5168 if (parseToken(AsmToken::EndOfStatement, 5169 "unexpected token in '.amdgpu_lds' directive")) 5170 return true; 5171 5172 Symbol->redefineIfPossible(); 5173 if (!Symbol->isUndefined()) 5174 return Error(NameLoc, "invalid symbol redefinition"); 5175 5176 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5177 return false; 5178 } 5179 5180 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5181 StringRef IDVal = DirectiveID.getString(); 5182 5183 if (isHsaAbiVersion3Or4(&getSTI())) { 5184 if (IDVal == ".amdhsa_kernel") 5185 return ParseDirectiveAMDHSAKernel(); 5186 5187 // TODO: Restructure/combine with PAL metadata directive. 5188 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5189 return ParseDirectiveHSAMetadata(); 5190 } else { 5191 if (IDVal == ".hsa_code_object_version") 5192 return ParseDirectiveHSACodeObjectVersion(); 5193 5194 if (IDVal == ".hsa_code_object_isa") 5195 return ParseDirectiveHSACodeObjectISA(); 5196 5197 if (IDVal == ".amd_kernel_code_t") 5198 return ParseDirectiveAMDKernelCodeT(); 5199 5200 if (IDVal == ".amdgpu_hsa_kernel") 5201 return ParseDirectiveAMDGPUHsaKernel(); 5202 5203 if (IDVal == ".amd_amdgpu_isa") 5204 return ParseDirectiveISAVersion(); 5205 5206 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5207 return ParseDirectiveHSAMetadata(); 5208 } 5209 5210 if (IDVal == ".amdgcn_target") 5211 return ParseDirectiveAMDGCNTarget(); 5212 5213 if (IDVal == ".amdgpu_lds") 5214 return ParseDirectiveAMDGPULDS(); 5215 5216 if (IDVal == PALMD::AssemblerDirectiveBegin) 5217 return ParseDirectivePALMetadataBegin(); 5218 5219 if (IDVal == PALMD::AssemblerDirective) 5220 return ParseDirectivePALMetadata(); 5221 5222 return true; 5223 } 5224 5225 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5226 unsigned RegNo) { 5227 5228 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5229 R.isValid(); ++R) { 5230 if (*R == RegNo) 5231 return isGFX9Plus(); 5232 } 5233 5234 // GFX10 has 2 more SGPRs 104 and 105. 5235 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5236 R.isValid(); ++R) { 5237 if (*R == RegNo) 5238 return hasSGPR104_SGPR105(); 5239 } 5240 5241 switch (RegNo) { 5242 case AMDGPU::SRC_SHARED_BASE: 5243 case AMDGPU::SRC_SHARED_LIMIT: 5244 case AMDGPU::SRC_PRIVATE_BASE: 5245 case AMDGPU::SRC_PRIVATE_LIMIT: 5246 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5247 return isGFX9Plus(); 5248 case AMDGPU::TBA: 5249 case AMDGPU::TBA_LO: 5250 case AMDGPU::TBA_HI: 5251 case AMDGPU::TMA: 5252 case AMDGPU::TMA_LO: 5253 case AMDGPU::TMA_HI: 5254 return !isGFX9Plus(); 5255 case AMDGPU::XNACK_MASK: 5256 case AMDGPU::XNACK_MASK_LO: 5257 case AMDGPU::XNACK_MASK_HI: 5258 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5259 case AMDGPU::SGPR_NULL: 5260 return isGFX10Plus(); 5261 default: 5262 break; 5263 } 5264 5265 if (isCI()) 5266 return true; 5267 5268 if (isSI() || isGFX10Plus()) { 5269 // No flat_scr on SI. 5270 // On GFX10 flat scratch is not a valid register operand and can only be 5271 // accessed with s_setreg/s_getreg. 5272 switch (RegNo) { 5273 case AMDGPU::FLAT_SCR: 5274 case AMDGPU::FLAT_SCR_LO: 5275 case AMDGPU::FLAT_SCR_HI: 5276 return false; 5277 default: 5278 return true; 5279 } 5280 } 5281 5282 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5283 // SI/CI have. 5284 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5285 R.isValid(); ++R) { 5286 if (*R == RegNo) 5287 return hasSGPR102_SGPR103(); 5288 } 5289 5290 return true; 5291 } 5292 5293 OperandMatchResultTy 5294 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5295 OperandMode Mode) { 5296 // Try to parse with a custom parser 5297 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5298 5299 // If we successfully parsed the operand or if there as an error parsing, 5300 // we are done. 5301 // 5302 // If we are parsing after we reach EndOfStatement then this means we 5303 // are appending default values to the Operands list. This is only done 5304 // by custom parser, so we shouldn't continue on to the generic parsing. 5305 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5306 isToken(AsmToken::EndOfStatement)) 5307 return ResTy; 5308 5309 SMLoc RBraceLoc; 5310 SMLoc LBraceLoc = getLoc(); 5311 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5312 unsigned Prefix = Operands.size(); 5313 5314 for (;;) { 5315 auto Loc = getLoc(); 5316 ResTy = parseReg(Operands); 5317 if (ResTy == MatchOperand_NoMatch) 5318 Error(Loc, "expected a register"); 5319 if (ResTy != MatchOperand_Success) 5320 return MatchOperand_ParseFail; 5321 5322 RBraceLoc = getLoc(); 5323 if (trySkipToken(AsmToken::RBrac)) 5324 break; 5325 5326 if (!skipToken(AsmToken::Comma, 5327 "expected a comma or a closing square bracket")) { 5328 return MatchOperand_ParseFail; 5329 } 5330 } 5331 5332 if (Operands.size() - Prefix > 1) { 5333 Operands.insert(Operands.begin() + Prefix, 5334 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5335 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5336 } 5337 5338 return MatchOperand_Success; 5339 } 5340 5341 return parseRegOrImm(Operands); 5342 } 5343 5344 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5345 // Clear any forced encodings from the previous instruction. 5346 setForcedEncodingSize(0); 5347 setForcedDPP(false); 5348 setForcedSDWA(false); 5349 5350 if (Name.endswith("_e64")) { 5351 setForcedEncodingSize(64); 5352 return Name.substr(0, Name.size() - 4); 5353 } else if (Name.endswith("_e32")) { 5354 setForcedEncodingSize(32); 5355 return Name.substr(0, Name.size() - 4); 5356 } else if (Name.endswith("_dpp")) { 5357 setForcedDPP(true); 5358 return Name.substr(0, Name.size() - 4); 5359 } else if (Name.endswith("_sdwa")) { 5360 setForcedSDWA(true); 5361 return Name.substr(0, Name.size() - 5); 5362 } 5363 return Name; 5364 } 5365 5366 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5367 StringRef Name, 5368 SMLoc NameLoc, OperandVector &Operands) { 5369 // Add the instruction mnemonic 5370 Name = parseMnemonicSuffix(Name); 5371 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5372 5373 bool IsMIMG = Name.startswith("image_"); 5374 5375 while (!trySkipToken(AsmToken::EndOfStatement)) { 5376 OperandMode Mode = OperandMode_Default; 5377 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5378 Mode = OperandMode_NSA; 5379 CPolSeen = 0; 5380 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5381 5382 if (Res != MatchOperand_Success) { 5383 checkUnsupportedInstruction(Name, NameLoc); 5384 if (!Parser.hasPendingError()) { 5385 // FIXME: use real operand location rather than the current location. 5386 StringRef Msg = 5387 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5388 "not a valid operand."; 5389 Error(getLoc(), Msg); 5390 } 5391 while (!trySkipToken(AsmToken::EndOfStatement)) { 5392 lex(); 5393 } 5394 return true; 5395 } 5396 5397 // Eat the comma or space if there is one. 5398 trySkipToken(AsmToken::Comma); 5399 } 5400 5401 return false; 5402 } 5403 5404 //===----------------------------------------------------------------------===// 5405 // Utility functions 5406 //===----------------------------------------------------------------------===// 5407 5408 OperandMatchResultTy 5409 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5410 5411 if (!trySkipId(Prefix, AsmToken::Colon)) 5412 return MatchOperand_NoMatch; 5413 5414 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5415 } 5416 5417 OperandMatchResultTy 5418 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5419 AMDGPUOperand::ImmTy ImmTy, 5420 bool (*ConvertResult)(int64_t&)) { 5421 SMLoc S = getLoc(); 5422 int64_t Value = 0; 5423 5424 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5425 if (Res != MatchOperand_Success) 5426 return Res; 5427 5428 if (ConvertResult && !ConvertResult(Value)) { 5429 Error(S, "invalid " + StringRef(Prefix) + " value."); 5430 } 5431 5432 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5433 return MatchOperand_Success; 5434 } 5435 5436 OperandMatchResultTy 5437 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5438 OperandVector &Operands, 5439 AMDGPUOperand::ImmTy ImmTy, 5440 bool (*ConvertResult)(int64_t&)) { 5441 SMLoc S = getLoc(); 5442 if (!trySkipId(Prefix, AsmToken::Colon)) 5443 return MatchOperand_NoMatch; 5444 5445 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5446 return MatchOperand_ParseFail; 5447 5448 unsigned Val = 0; 5449 const unsigned MaxSize = 4; 5450 5451 // FIXME: How to verify the number of elements matches the number of src 5452 // operands? 5453 for (int I = 0; ; ++I) { 5454 int64_t Op; 5455 SMLoc Loc = getLoc(); 5456 if (!parseExpr(Op)) 5457 return MatchOperand_ParseFail; 5458 5459 if (Op != 0 && Op != 1) { 5460 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5461 return MatchOperand_ParseFail; 5462 } 5463 5464 Val |= (Op << I); 5465 5466 if (trySkipToken(AsmToken::RBrac)) 5467 break; 5468 5469 if (I + 1 == MaxSize) { 5470 Error(getLoc(), "expected a closing square bracket"); 5471 return MatchOperand_ParseFail; 5472 } 5473 5474 if (!skipToken(AsmToken::Comma, "expected a comma")) 5475 return MatchOperand_ParseFail; 5476 } 5477 5478 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5479 return MatchOperand_Success; 5480 } 5481 5482 OperandMatchResultTy 5483 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5484 AMDGPUOperand::ImmTy ImmTy) { 5485 int64_t Bit; 5486 SMLoc S = getLoc(); 5487 5488 if (trySkipId(Name)) { 5489 Bit = 1; 5490 } else if (trySkipId("no", Name)) { 5491 Bit = 0; 5492 } else { 5493 return MatchOperand_NoMatch; 5494 } 5495 5496 if (Name == "r128" && !hasMIMG_R128()) { 5497 Error(S, "r128 modifier is not supported on this GPU"); 5498 return MatchOperand_ParseFail; 5499 } 5500 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5501 Error(S, "a16 modifier is not supported on this GPU"); 5502 return MatchOperand_ParseFail; 5503 } 5504 5505 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5506 ImmTy = AMDGPUOperand::ImmTyR128A16; 5507 5508 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5509 return MatchOperand_Success; 5510 } 5511 5512 OperandMatchResultTy 5513 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5514 unsigned CPolOn = 0; 5515 unsigned CPolOff = 0; 5516 SMLoc S = getLoc(); 5517 5518 if (trySkipId("glc")) 5519 CPolOn = AMDGPU::CPol::GLC; 5520 else if (trySkipId("noglc")) 5521 CPolOff = AMDGPU::CPol::GLC; 5522 else if (trySkipId("slc")) 5523 CPolOn = AMDGPU::CPol::SLC; 5524 else if (trySkipId("noslc")) 5525 CPolOff = AMDGPU::CPol::SLC; 5526 else if (trySkipId("dlc")) 5527 CPolOn = AMDGPU::CPol::DLC; 5528 else if (trySkipId("nodlc")) 5529 CPolOff = AMDGPU::CPol::DLC; 5530 else if (trySkipId("scc")) 5531 CPolOn = AMDGPU::CPol::SCC; 5532 else if (trySkipId("noscc")) 5533 CPolOff = AMDGPU::CPol::SCC; 5534 else 5535 return MatchOperand_NoMatch; 5536 5537 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5538 Error(S, "dlc modifier is not supported on this GPU"); 5539 return MatchOperand_ParseFail; 5540 } 5541 5542 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5543 Error(S, "scc modifier is not supported on this GPU"); 5544 return MatchOperand_ParseFail; 5545 } 5546 5547 if (CPolSeen & (CPolOn | CPolOff)) { 5548 Error(S, "duplicate cache policy modifier"); 5549 return MatchOperand_ParseFail; 5550 } 5551 5552 CPolSeen |= (CPolOn | CPolOff); 5553 5554 for (unsigned I = 1; I != Operands.size(); ++I) { 5555 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5556 if (Op.isCPol()) { 5557 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5558 return MatchOperand_Success; 5559 } 5560 } 5561 5562 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5563 AMDGPUOperand::ImmTyCPol)); 5564 5565 return MatchOperand_Success; 5566 } 5567 5568 static void addOptionalImmOperand( 5569 MCInst& Inst, const OperandVector& Operands, 5570 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5571 AMDGPUOperand::ImmTy ImmT, 5572 int64_t Default = 0) { 5573 auto i = OptionalIdx.find(ImmT); 5574 if (i != OptionalIdx.end()) { 5575 unsigned Idx = i->second; 5576 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5577 } else { 5578 Inst.addOperand(MCOperand::createImm(Default)); 5579 } 5580 } 5581 5582 OperandMatchResultTy 5583 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5584 StringRef &Value, 5585 SMLoc &StringLoc) { 5586 if (!trySkipId(Prefix, AsmToken::Colon)) 5587 return MatchOperand_NoMatch; 5588 5589 StringLoc = getLoc(); 5590 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5591 : MatchOperand_ParseFail; 5592 } 5593 5594 //===----------------------------------------------------------------------===// 5595 // MTBUF format 5596 //===----------------------------------------------------------------------===// 5597 5598 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5599 int64_t MaxVal, 5600 int64_t &Fmt) { 5601 int64_t Val; 5602 SMLoc Loc = getLoc(); 5603 5604 auto Res = parseIntWithPrefix(Pref, Val); 5605 if (Res == MatchOperand_ParseFail) 5606 return false; 5607 if (Res == MatchOperand_NoMatch) 5608 return true; 5609 5610 if (Val < 0 || Val > MaxVal) { 5611 Error(Loc, Twine("out of range ", StringRef(Pref))); 5612 return false; 5613 } 5614 5615 Fmt = Val; 5616 return true; 5617 } 5618 5619 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5620 // values to live in a joint format operand in the MCInst encoding. 5621 OperandMatchResultTy 5622 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5623 using namespace llvm::AMDGPU::MTBUFFormat; 5624 5625 int64_t Dfmt = DFMT_UNDEF; 5626 int64_t Nfmt = NFMT_UNDEF; 5627 5628 // dfmt and nfmt can appear in either order, and each is optional. 5629 for (int I = 0; I < 2; ++I) { 5630 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5631 return MatchOperand_ParseFail; 5632 5633 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5634 return MatchOperand_ParseFail; 5635 } 5636 // Skip optional comma between dfmt/nfmt 5637 // but guard against 2 commas following each other. 5638 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5639 !peekToken().is(AsmToken::Comma)) { 5640 trySkipToken(AsmToken::Comma); 5641 } 5642 } 5643 5644 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5645 return MatchOperand_NoMatch; 5646 5647 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5648 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5649 5650 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5651 return MatchOperand_Success; 5652 } 5653 5654 OperandMatchResultTy 5655 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5656 using namespace llvm::AMDGPU::MTBUFFormat; 5657 5658 int64_t Fmt = UFMT_UNDEF; 5659 5660 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5661 return MatchOperand_ParseFail; 5662 5663 if (Fmt == UFMT_UNDEF) 5664 return MatchOperand_NoMatch; 5665 5666 Format = Fmt; 5667 return MatchOperand_Success; 5668 } 5669 5670 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5671 int64_t &Nfmt, 5672 StringRef FormatStr, 5673 SMLoc Loc) { 5674 using namespace llvm::AMDGPU::MTBUFFormat; 5675 int64_t Format; 5676 5677 Format = getDfmt(FormatStr); 5678 if (Format != DFMT_UNDEF) { 5679 Dfmt = Format; 5680 return true; 5681 } 5682 5683 Format = getNfmt(FormatStr, getSTI()); 5684 if (Format != NFMT_UNDEF) { 5685 Nfmt = Format; 5686 return true; 5687 } 5688 5689 Error(Loc, "unsupported format"); 5690 return false; 5691 } 5692 5693 OperandMatchResultTy 5694 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5695 SMLoc FormatLoc, 5696 int64_t &Format) { 5697 using namespace llvm::AMDGPU::MTBUFFormat; 5698 5699 int64_t Dfmt = DFMT_UNDEF; 5700 int64_t Nfmt = NFMT_UNDEF; 5701 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5702 return MatchOperand_ParseFail; 5703 5704 if (trySkipToken(AsmToken::Comma)) { 5705 StringRef Str; 5706 SMLoc Loc = getLoc(); 5707 if (!parseId(Str, "expected a format string") || 5708 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5709 return MatchOperand_ParseFail; 5710 } 5711 if (Dfmt == DFMT_UNDEF) { 5712 Error(Loc, "duplicate numeric format"); 5713 return MatchOperand_ParseFail; 5714 } else if (Nfmt == NFMT_UNDEF) { 5715 Error(Loc, "duplicate data format"); 5716 return MatchOperand_ParseFail; 5717 } 5718 } 5719 5720 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5721 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5722 5723 if (isGFX10Plus()) { 5724 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5725 if (Ufmt == UFMT_UNDEF) { 5726 Error(FormatLoc, "unsupported format"); 5727 return MatchOperand_ParseFail; 5728 } 5729 Format = Ufmt; 5730 } else { 5731 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5732 } 5733 5734 return MatchOperand_Success; 5735 } 5736 5737 OperandMatchResultTy 5738 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5739 SMLoc Loc, 5740 int64_t &Format) { 5741 using namespace llvm::AMDGPU::MTBUFFormat; 5742 5743 auto Id = getUnifiedFormat(FormatStr); 5744 if (Id == UFMT_UNDEF) 5745 return MatchOperand_NoMatch; 5746 5747 if (!isGFX10Plus()) { 5748 Error(Loc, "unified format is not supported on this GPU"); 5749 return MatchOperand_ParseFail; 5750 } 5751 5752 Format = Id; 5753 return MatchOperand_Success; 5754 } 5755 5756 OperandMatchResultTy 5757 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5758 using namespace llvm::AMDGPU::MTBUFFormat; 5759 SMLoc Loc = getLoc(); 5760 5761 if (!parseExpr(Format)) 5762 return MatchOperand_ParseFail; 5763 if (!isValidFormatEncoding(Format, getSTI())) { 5764 Error(Loc, "out of range format"); 5765 return MatchOperand_ParseFail; 5766 } 5767 5768 return MatchOperand_Success; 5769 } 5770 5771 OperandMatchResultTy 5772 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5773 using namespace llvm::AMDGPU::MTBUFFormat; 5774 5775 if (!trySkipId("format", AsmToken::Colon)) 5776 return MatchOperand_NoMatch; 5777 5778 if (trySkipToken(AsmToken::LBrac)) { 5779 StringRef FormatStr; 5780 SMLoc Loc = getLoc(); 5781 if (!parseId(FormatStr, "expected a format string")) 5782 return MatchOperand_ParseFail; 5783 5784 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5785 if (Res == MatchOperand_NoMatch) 5786 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5787 if (Res != MatchOperand_Success) 5788 return Res; 5789 5790 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5791 return MatchOperand_ParseFail; 5792 5793 return MatchOperand_Success; 5794 } 5795 5796 return parseNumericFormat(Format); 5797 } 5798 5799 OperandMatchResultTy 5800 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5801 using namespace llvm::AMDGPU::MTBUFFormat; 5802 5803 int64_t Format = getDefaultFormatEncoding(getSTI()); 5804 OperandMatchResultTy Res; 5805 SMLoc Loc = getLoc(); 5806 5807 // Parse legacy format syntax. 5808 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5809 if (Res == MatchOperand_ParseFail) 5810 return Res; 5811 5812 bool FormatFound = (Res == MatchOperand_Success); 5813 5814 Operands.push_back( 5815 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5816 5817 if (FormatFound) 5818 trySkipToken(AsmToken::Comma); 5819 5820 if (isToken(AsmToken::EndOfStatement)) { 5821 // We are expecting an soffset operand, 5822 // but let matcher handle the error. 5823 return MatchOperand_Success; 5824 } 5825 5826 // Parse soffset. 5827 Res = parseRegOrImm(Operands); 5828 if (Res != MatchOperand_Success) 5829 return Res; 5830 5831 trySkipToken(AsmToken::Comma); 5832 5833 if (!FormatFound) { 5834 Res = parseSymbolicOrNumericFormat(Format); 5835 if (Res == MatchOperand_ParseFail) 5836 return Res; 5837 if (Res == MatchOperand_Success) { 5838 auto Size = Operands.size(); 5839 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5840 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5841 Op.setImm(Format); 5842 } 5843 return MatchOperand_Success; 5844 } 5845 5846 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5847 Error(getLoc(), "duplicate format"); 5848 return MatchOperand_ParseFail; 5849 } 5850 return MatchOperand_Success; 5851 } 5852 5853 //===----------------------------------------------------------------------===// 5854 // ds 5855 //===----------------------------------------------------------------------===// 5856 5857 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5858 const OperandVector &Operands) { 5859 OptionalImmIndexMap OptionalIdx; 5860 5861 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5862 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5863 5864 // Add the register arguments 5865 if (Op.isReg()) { 5866 Op.addRegOperands(Inst, 1); 5867 continue; 5868 } 5869 5870 // Handle optional arguments 5871 OptionalIdx[Op.getImmTy()] = i; 5872 } 5873 5874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5876 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5877 5878 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5879 } 5880 5881 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5882 bool IsGdsHardcoded) { 5883 OptionalImmIndexMap OptionalIdx; 5884 5885 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5886 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5887 5888 // Add the register arguments 5889 if (Op.isReg()) { 5890 Op.addRegOperands(Inst, 1); 5891 continue; 5892 } 5893 5894 if (Op.isToken() && Op.getToken() == "gds") { 5895 IsGdsHardcoded = true; 5896 continue; 5897 } 5898 5899 // Handle optional arguments 5900 OptionalIdx[Op.getImmTy()] = i; 5901 } 5902 5903 AMDGPUOperand::ImmTy OffsetType = 5904 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5905 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5906 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5907 AMDGPUOperand::ImmTyOffset; 5908 5909 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5910 5911 if (!IsGdsHardcoded) { 5912 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5913 } 5914 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5915 } 5916 5917 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5918 OptionalImmIndexMap OptionalIdx; 5919 5920 unsigned OperandIdx[4]; 5921 unsigned EnMask = 0; 5922 int SrcIdx = 0; 5923 5924 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5925 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5926 5927 // Add the register arguments 5928 if (Op.isReg()) { 5929 assert(SrcIdx < 4); 5930 OperandIdx[SrcIdx] = Inst.size(); 5931 Op.addRegOperands(Inst, 1); 5932 ++SrcIdx; 5933 continue; 5934 } 5935 5936 if (Op.isOff()) { 5937 assert(SrcIdx < 4); 5938 OperandIdx[SrcIdx] = Inst.size(); 5939 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5940 ++SrcIdx; 5941 continue; 5942 } 5943 5944 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5945 Op.addImmOperands(Inst, 1); 5946 continue; 5947 } 5948 5949 if (Op.isToken() && Op.getToken() == "done") 5950 continue; 5951 5952 // Handle optional arguments 5953 OptionalIdx[Op.getImmTy()] = i; 5954 } 5955 5956 assert(SrcIdx == 4); 5957 5958 bool Compr = false; 5959 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5960 Compr = true; 5961 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5962 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5963 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5964 } 5965 5966 for (auto i = 0; i < SrcIdx; ++i) { 5967 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5968 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5969 } 5970 } 5971 5972 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5973 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5974 5975 Inst.addOperand(MCOperand::createImm(EnMask)); 5976 } 5977 5978 //===----------------------------------------------------------------------===// 5979 // s_waitcnt 5980 //===----------------------------------------------------------------------===// 5981 5982 static bool 5983 encodeCnt( 5984 const AMDGPU::IsaVersion ISA, 5985 int64_t &IntVal, 5986 int64_t CntVal, 5987 bool Saturate, 5988 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5989 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5990 { 5991 bool Failed = false; 5992 5993 IntVal = encode(ISA, IntVal, CntVal); 5994 if (CntVal != decode(ISA, IntVal)) { 5995 if (Saturate) { 5996 IntVal = encode(ISA, IntVal, -1); 5997 } else { 5998 Failed = true; 5999 } 6000 } 6001 return Failed; 6002 } 6003 6004 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6005 6006 SMLoc CntLoc = getLoc(); 6007 StringRef CntName = getTokenStr(); 6008 6009 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6010 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6011 return false; 6012 6013 int64_t CntVal; 6014 SMLoc ValLoc = getLoc(); 6015 if (!parseExpr(CntVal)) 6016 return false; 6017 6018 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6019 6020 bool Failed = true; 6021 bool Sat = CntName.endswith("_sat"); 6022 6023 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6024 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6025 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6026 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6027 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6028 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6029 } else { 6030 Error(CntLoc, "invalid counter name " + CntName); 6031 return false; 6032 } 6033 6034 if (Failed) { 6035 Error(ValLoc, "too large value for " + CntName); 6036 return false; 6037 } 6038 6039 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6040 return false; 6041 6042 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6043 if (isToken(AsmToken::EndOfStatement)) { 6044 Error(getLoc(), "expected a counter name"); 6045 return false; 6046 } 6047 } 6048 6049 return true; 6050 } 6051 6052 OperandMatchResultTy 6053 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6054 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6055 int64_t Waitcnt = getWaitcntBitMask(ISA); 6056 SMLoc S = getLoc(); 6057 6058 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6059 while (!isToken(AsmToken::EndOfStatement)) { 6060 if (!parseCnt(Waitcnt)) 6061 return MatchOperand_ParseFail; 6062 } 6063 } else { 6064 if (!parseExpr(Waitcnt)) 6065 return MatchOperand_ParseFail; 6066 } 6067 6068 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6069 return MatchOperand_Success; 6070 } 6071 6072 bool 6073 AMDGPUOperand::isSWaitCnt() const { 6074 return isImm(); 6075 } 6076 6077 //===----------------------------------------------------------------------===// 6078 // hwreg 6079 //===----------------------------------------------------------------------===// 6080 6081 bool 6082 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6083 OperandInfoTy &Offset, 6084 OperandInfoTy &Width) { 6085 using namespace llvm::AMDGPU::Hwreg; 6086 6087 // The register may be specified by name or using a numeric code 6088 HwReg.Loc = getLoc(); 6089 if (isToken(AsmToken::Identifier) && 6090 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6091 HwReg.IsSymbolic = true; 6092 lex(); // skip register name 6093 } else if (!parseExpr(HwReg.Id, "a register name")) { 6094 return false; 6095 } 6096 6097 if (trySkipToken(AsmToken::RParen)) 6098 return true; 6099 6100 // parse optional params 6101 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6102 return false; 6103 6104 Offset.Loc = getLoc(); 6105 if (!parseExpr(Offset.Id)) 6106 return false; 6107 6108 if (!skipToken(AsmToken::Comma, "expected a comma")) 6109 return false; 6110 6111 Width.Loc = getLoc(); 6112 return parseExpr(Width.Id) && 6113 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6114 } 6115 6116 bool 6117 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6118 const OperandInfoTy &Offset, 6119 const OperandInfoTy &Width) { 6120 6121 using namespace llvm::AMDGPU::Hwreg; 6122 6123 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6124 Error(HwReg.Loc, 6125 "specified hardware register is not supported on this GPU"); 6126 return false; 6127 } 6128 if (!isValidHwreg(HwReg.Id)) { 6129 Error(HwReg.Loc, 6130 "invalid code of hardware register: only 6-bit values are legal"); 6131 return false; 6132 } 6133 if (!isValidHwregOffset(Offset.Id)) { 6134 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6135 return false; 6136 } 6137 if (!isValidHwregWidth(Width.Id)) { 6138 Error(Width.Loc, 6139 "invalid bitfield width: only values from 1 to 32 are legal"); 6140 return false; 6141 } 6142 return true; 6143 } 6144 6145 OperandMatchResultTy 6146 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6147 using namespace llvm::AMDGPU::Hwreg; 6148 6149 int64_t ImmVal = 0; 6150 SMLoc Loc = getLoc(); 6151 6152 if (trySkipId("hwreg", AsmToken::LParen)) { 6153 OperandInfoTy HwReg(ID_UNKNOWN_); 6154 OperandInfoTy Offset(OFFSET_DEFAULT_); 6155 OperandInfoTy Width(WIDTH_DEFAULT_); 6156 if (parseHwregBody(HwReg, Offset, Width) && 6157 validateHwreg(HwReg, Offset, Width)) { 6158 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6159 } else { 6160 return MatchOperand_ParseFail; 6161 } 6162 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6163 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6164 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6165 return MatchOperand_ParseFail; 6166 } 6167 } else { 6168 return MatchOperand_ParseFail; 6169 } 6170 6171 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6172 return MatchOperand_Success; 6173 } 6174 6175 bool AMDGPUOperand::isHwreg() const { 6176 return isImmTy(ImmTyHwreg); 6177 } 6178 6179 //===----------------------------------------------------------------------===// 6180 // sendmsg 6181 //===----------------------------------------------------------------------===// 6182 6183 bool 6184 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6185 OperandInfoTy &Op, 6186 OperandInfoTy &Stream) { 6187 using namespace llvm::AMDGPU::SendMsg; 6188 6189 Msg.Loc = getLoc(); 6190 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6191 Msg.IsSymbolic = true; 6192 lex(); // skip message name 6193 } else if (!parseExpr(Msg.Id, "a message name")) { 6194 return false; 6195 } 6196 6197 if (trySkipToken(AsmToken::Comma)) { 6198 Op.IsDefined = true; 6199 Op.Loc = getLoc(); 6200 if (isToken(AsmToken::Identifier) && 6201 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6202 lex(); // skip operation name 6203 } else if (!parseExpr(Op.Id, "an operation name")) { 6204 return false; 6205 } 6206 6207 if (trySkipToken(AsmToken::Comma)) { 6208 Stream.IsDefined = true; 6209 Stream.Loc = getLoc(); 6210 if (!parseExpr(Stream.Id)) 6211 return false; 6212 } 6213 } 6214 6215 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6216 } 6217 6218 bool 6219 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6220 const OperandInfoTy &Op, 6221 const OperandInfoTy &Stream) { 6222 using namespace llvm::AMDGPU::SendMsg; 6223 6224 // Validation strictness depends on whether message is specified 6225 // in a symbolc or in a numeric form. In the latter case 6226 // only encoding possibility is checked. 6227 bool Strict = Msg.IsSymbolic; 6228 6229 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6230 Error(Msg.Loc, "invalid message id"); 6231 return false; 6232 } 6233 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6234 if (Op.IsDefined) { 6235 Error(Op.Loc, "message does not support operations"); 6236 } else { 6237 Error(Msg.Loc, "missing message operation"); 6238 } 6239 return false; 6240 } 6241 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6242 Error(Op.Loc, "invalid operation id"); 6243 return false; 6244 } 6245 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6246 Error(Stream.Loc, "message operation does not support streams"); 6247 return false; 6248 } 6249 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6250 Error(Stream.Loc, "invalid message stream id"); 6251 return false; 6252 } 6253 return true; 6254 } 6255 6256 OperandMatchResultTy 6257 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6258 using namespace llvm::AMDGPU::SendMsg; 6259 6260 int64_t ImmVal = 0; 6261 SMLoc Loc = getLoc(); 6262 6263 if (trySkipId("sendmsg", AsmToken::LParen)) { 6264 OperandInfoTy Msg(ID_UNKNOWN_); 6265 OperandInfoTy Op(OP_NONE_); 6266 OperandInfoTy Stream(STREAM_ID_NONE_); 6267 if (parseSendMsgBody(Msg, Op, Stream) && 6268 validateSendMsg(Msg, Op, Stream)) { 6269 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6270 } else { 6271 return MatchOperand_ParseFail; 6272 } 6273 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6274 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6275 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6276 return MatchOperand_ParseFail; 6277 } 6278 } else { 6279 return MatchOperand_ParseFail; 6280 } 6281 6282 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6283 return MatchOperand_Success; 6284 } 6285 6286 bool AMDGPUOperand::isSendMsg() const { 6287 return isImmTy(ImmTySendMsg); 6288 } 6289 6290 //===----------------------------------------------------------------------===// 6291 // v_interp 6292 //===----------------------------------------------------------------------===// 6293 6294 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6295 StringRef Str; 6296 SMLoc S = getLoc(); 6297 6298 if (!parseId(Str)) 6299 return MatchOperand_NoMatch; 6300 6301 int Slot = StringSwitch<int>(Str) 6302 .Case("p10", 0) 6303 .Case("p20", 1) 6304 .Case("p0", 2) 6305 .Default(-1); 6306 6307 if (Slot == -1) { 6308 Error(S, "invalid interpolation slot"); 6309 return MatchOperand_ParseFail; 6310 } 6311 6312 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6313 AMDGPUOperand::ImmTyInterpSlot)); 6314 return MatchOperand_Success; 6315 } 6316 6317 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6318 StringRef Str; 6319 SMLoc S = getLoc(); 6320 6321 if (!parseId(Str)) 6322 return MatchOperand_NoMatch; 6323 6324 if (!Str.startswith("attr")) { 6325 Error(S, "invalid interpolation attribute"); 6326 return MatchOperand_ParseFail; 6327 } 6328 6329 StringRef Chan = Str.take_back(2); 6330 int AttrChan = StringSwitch<int>(Chan) 6331 .Case(".x", 0) 6332 .Case(".y", 1) 6333 .Case(".z", 2) 6334 .Case(".w", 3) 6335 .Default(-1); 6336 if (AttrChan == -1) { 6337 Error(S, "invalid or missing interpolation attribute channel"); 6338 return MatchOperand_ParseFail; 6339 } 6340 6341 Str = Str.drop_back(2).drop_front(4); 6342 6343 uint8_t Attr; 6344 if (Str.getAsInteger(10, Attr)) { 6345 Error(S, "invalid or missing interpolation attribute number"); 6346 return MatchOperand_ParseFail; 6347 } 6348 6349 if (Attr > 63) { 6350 Error(S, "out of bounds interpolation attribute number"); 6351 return MatchOperand_ParseFail; 6352 } 6353 6354 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6355 6356 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6357 AMDGPUOperand::ImmTyInterpAttr)); 6358 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6359 AMDGPUOperand::ImmTyAttrChan)); 6360 return MatchOperand_Success; 6361 } 6362 6363 //===----------------------------------------------------------------------===// 6364 // exp 6365 //===----------------------------------------------------------------------===// 6366 6367 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6368 using namespace llvm::AMDGPU::Exp; 6369 6370 StringRef Str; 6371 SMLoc S = getLoc(); 6372 6373 if (!parseId(Str)) 6374 return MatchOperand_NoMatch; 6375 6376 unsigned Id = getTgtId(Str); 6377 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6378 Error(S, (Id == ET_INVALID) ? 6379 "invalid exp target" : 6380 "exp target is not supported on this GPU"); 6381 return MatchOperand_ParseFail; 6382 } 6383 6384 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6385 AMDGPUOperand::ImmTyExpTgt)); 6386 return MatchOperand_Success; 6387 } 6388 6389 //===----------------------------------------------------------------------===// 6390 // parser helpers 6391 //===----------------------------------------------------------------------===// 6392 6393 bool 6394 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6395 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6396 } 6397 6398 bool 6399 AMDGPUAsmParser::isId(const StringRef Id) const { 6400 return isId(getToken(), Id); 6401 } 6402 6403 bool 6404 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6405 return getTokenKind() == Kind; 6406 } 6407 6408 bool 6409 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6410 if (isId(Id)) { 6411 lex(); 6412 return true; 6413 } 6414 return false; 6415 } 6416 6417 bool 6418 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6419 if (isToken(AsmToken::Identifier)) { 6420 StringRef Tok = getTokenStr(); 6421 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6422 lex(); 6423 return true; 6424 } 6425 } 6426 return false; 6427 } 6428 6429 bool 6430 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6431 if (isId(Id) && peekToken().is(Kind)) { 6432 lex(); 6433 lex(); 6434 return true; 6435 } 6436 return false; 6437 } 6438 6439 bool 6440 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6441 if (isToken(Kind)) { 6442 lex(); 6443 return true; 6444 } 6445 return false; 6446 } 6447 6448 bool 6449 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6450 const StringRef ErrMsg) { 6451 if (!trySkipToken(Kind)) { 6452 Error(getLoc(), ErrMsg); 6453 return false; 6454 } 6455 return true; 6456 } 6457 6458 bool 6459 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6460 SMLoc S = getLoc(); 6461 6462 const MCExpr *Expr; 6463 if (Parser.parseExpression(Expr)) 6464 return false; 6465 6466 if (Expr->evaluateAsAbsolute(Imm)) 6467 return true; 6468 6469 if (Expected.empty()) { 6470 Error(S, "expected absolute expression"); 6471 } else { 6472 Error(S, Twine("expected ", Expected) + 6473 Twine(" or an absolute expression")); 6474 } 6475 return false; 6476 } 6477 6478 bool 6479 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6480 SMLoc S = getLoc(); 6481 6482 const MCExpr *Expr; 6483 if (Parser.parseExpression(Expr)) 6484 return false; 6485 6486 int64_t IntVal; 6487 if (Expr->evaluateAsAbsolute(IntVal)) { 6488 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6489 } else { 6490 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6491 } 6492 return true; 6493 } 6494 6495 bool 6496 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6497 if (isToken(AsmToken::String)) { 6498 Val = getToken().getStringContents(); 6499 lex(); 6500 return true; 6501 } else { 6502 Error(getLoc(), ErrMsg); 6503 return false; 6504 } 6505 } 6506 6507 bool 6508 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6509 if (isToken(AsmToken::Identifier)) { 6510 Val = getTokenStr(); 6511 lex(); 6512 return true; 6513 } else { 6514 if (!ErrMsg.empty()) 6515 Error(getLoc(), ErrMsg); 6516 return false; 6517 } 6518 } 6519 6520 AsmToken 6521 AMDGPUAsmParser::getToken() const { 6522 return Parser.getTok(); 6523 } 6524 6525 AsmToken 6526 AMDGPUAsmParser::peekToken() { 6527 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6528 } 6529 6530 void 6531 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6532 auto TokCount = getLexer().peekTokens(Tokens); 6533 6534 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6535 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6536 } 6537 6538 AsmToken::TokenKind 6539 AMDGPUAsmParser::getTokenKind() const { 6540 return getLexer().getKind(); 6541 } 6542 6543 SMLoc 6544 AMDGPUAsmParser::getLoc() const { 6545 return getToken().getLoc(); 6546 } 6547 6548 StringRef 6549 AMDGPUAsmParser::getTokenStr() const { 6550 return getToken().getString(); 6551 } 6552 6553 void 6554 AMDGPUAsmParser::lex() { 6555 Parser.Lex(); 6556 } 6557 6558 SMLoc 6559 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6560 const OperandVector &Operands) const { 6561 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6562 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6563 if (Test(Op)) 6564 return Op.getStartLoc(); 6565 } 6566 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6567 } 6568 6569 SMLoc 6570 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6571 const OperandVector &Operands) const { 6572 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6573 return getOperandLoc(Test, Operands); 6574 } 6575 6576 SMLoc 6577 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6578 const OperandVector &Operands) const { 6579 auto Test = [=](const AMDGPUOperand& Op) { 6580 return Op.isRegKind() && Op.getReg() == Reg; 6581 }; 6582 return getOperandLoc(Test, Operands); 6583 } 6584 6585 SMLoc 6586 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6587 auto Test = [](const AMDGPUOperand& Op) { 6588 return Op.IsImmKindLiteral() || Op.isExpr(); 6589 }; 6590 return getOperandLoc(Test, Operands); 6591 } 6592 6593 SMLoc 6594 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6595 auto Test = [](const AMDGPUOperand& Op) { 6596 return Op.isImmKindConst(); 6597 }; 6598 return getOperandLoc(Test, Operands); 6599 } 6600 6601 //===----------------------------------------------------------------------===// 6602 // swizzle 6603 //===----------------------------------------------------------------------===// 6604 6605 LLVM_READNONE 6606 static unsigned 6607 encodeBitmaskPerm(const unsigned AndMask, 6608 const unsigned OrMask, 6609 const unsigned XorMask) { 6610 using namespace llvm::AMDGPU::Swizzle; 6611 6612 return BITMASK_PERM_ENC | 6613 (AndMask << BITMASK_AND_SHIFT) | 6614 (OrMask << BITMASK_OR_SHIFT) | 6615 (XorMask << BITMASK_XOR_SHIFT); 6616 } 6617 6618 bool 6619 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6620 const unsigned MinVal, 6621 const unsigned MaxVal, 6622 const StringRef ErrMsg, 6623 SMLoc &Loc) { 6624 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6625 return false; 6626 } 6627 Loc = getLoc(); 6628 if (!parseExpr(Op)) { 6629 return false; 6630 } 6631 if (Op < MinVal || Op > MaxVal) { 6632 Error(Loc, ErrMsg); 6633 return false; 6634 } 6635 6636 return true; 6637 } 6638 6639 bool 6640 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6641 const unsigned MinVal, 6642 const unsigned MaxVal, 6643 const StringRef ErrMsg) { 6644 SMLoc Loc; 6645 for (unsigned i = 0; i < OpNum; ++i) { 6646 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6647 return false; 6648 } 6649 6650 return true; 6651 } 6652 6653 bool 6654 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6655 using namespace llvm::AMDGPU::Swizzle; 6656 6657 int64_t Lane[LANE_NUM]; 6658 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6659 "expected a 2-bit lane id")) { 6660 Imm = QUAD_PERM_ENC; 6661 for (unsigned I = 0; I < LANE_NUM; ++I) { 6662 Imm |= Lane[I] << (LANE_SHIFT * I); 6663 } 6664 return true; 6665 } 6666 return false; 6667 } 6668 6669 bool 6670 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6671 using namespace llvm::AMDGPU::Swizzle; 6672 6673 SMLoc Loc; 6674 int64_t GroupSize; 6675 int64_t LaneIdx; 6676 6677 if (!parseSwizzleOperand(GroupSize, 6678 2, 32, 6679 "group size must be in the interval [2,32]", 6680 Loc)) { 6681 return false; 6682 } 6683 if (!isPowerOf2_64(GroupSize)) { 6684 Error(Loc, "group size must be a power of two"); 6685 return false; 6686 } 6687 if (parseSwizzleOperand(LaneIdx, 6688 0, GroupSize - 1, 6689 "lane id must be in the interval [0,group size - 1]", 6690 Loc)) { 6691 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6692 return true; 6693 } 6694 return false; 6695 } 6696 6697 bool 6698 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6699 using namespace llvm::AMDGPU::Swizzle; 6700 6701 SMLoc Loc; 6702 int64_t GroupSize; 6703 6704 if (!parseSwizzleOperand(GroupSize, 6705 2, 32, 6706 "group size must be in the interval [2,32]", 6707 Loc)) { 6708 return false; 6709 } 6710 if (!isPowerOf2_64(GroupSize)) { 6711 Error(Loc, "group size must be a power of two"); 6712 return false; 6713 } 6714 6715 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6716 return true; 6717 } 6718 6719 bool 6720 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6721 using namespace llvm::AMDGPU::Swizzle; 6722 6723 SMLoc Loc; 6724 int64_t GroupSize; 6725 6726 if (!parseSwizzleOperand(GroupSize, 6727 1, 16, 6728 "group size must be in the interval [1,16]", 6729 Loc)) { 6730 return false; 6731 } 6732 if (!isPowerOf2_64(GroupSize)) { 6733 Error(Loc, "group size must be a power of two"); 6734 return false; 6735 } 6736 6737 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6738 return true; 6739 } 6740 6741 bool 6742 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6743 using namespace llvm::AMDGPU::Swizzle; 6744 6745 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6746 return false; 6747 } 6748 6749 StringRef Ctl; 6750 SMLoc StrLoc = getLoc(); 6751 if (!parseString(Ctl)) { 6752 return false; 6753 } 6754 if (Ctl.size() != BITMASK_WIDTH) { 6755 Error(StrLoc, "expected a 5-character mask"); 6756 return false; 6757 } 6758 6759 unsigned AndMask = 0; 6760 unsigned OrMask = 0; 6761 unsigned XorMask = 0; 6762 6763 for (size_t i = 0; i < Ctl.size(); ++i) { 6764 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6765 switch(Ctl[i]) { 6766 default: 6767 Error(StrLoc, "invalid mask"); 6768 return false; 6769 case '0': 6770 break; 6771 case '1': 6772 OrMask |= Mask; 6773 break; 6774 case 'p': 6775 AndMask |= Mask; 6776 break; 6777 case 'i': 6778 AndMask |= Mask; 6779 XorMask |= Mask; 6780 break; 6781 } 6782 } 6783 6784 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6785 return true; 6786 } 6787 6788 bool 6789 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6790 6791 SMLoc OffsetLoc = getLoc(); 6792 6793 if (!parseExpr(Imm, "a swizzle macro")) { 6794 return false; 6795 } 6796 if (!isUInt<16>(Imm)) { 6797 Error(OffsetLoc, "expected a 16-bit offset"); 6798 return false; 6799 } 6800 return true; 6801 } 6802 6803 bool 6804 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6805 using namespace llvm::AMDGPU::Swizzle; 6806 6807 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6808 6809 SMLoc ModeLoc = getLoc(); 6810 bool Ok = false; 6811 6812 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6813 Ok = parseSwizzleQuadPerm(Imm); 6814 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6815 Ok = parseSwizzleBitmaskPerm(Imm); 6816 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6817 Ok = parseSwizzleBroadcast(Imm); 6818 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6819 Ok = parseSwizzleSwap(Imm); 6820 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6821 Ok = parseSwizzleReverse(Imm); 6822 } else { 6823 Error(ModeLoc, "expected a swizzle mode"); 6824 } 6825 6826 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6827 } 6828 6829 return false; 6830 } 6831 6832 OperandMatchResultTy 6833 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6834 SMLoc S = getLoc(); 6835 int64_t Imm = 0; 6836 6837 if (trySkipId("offset")) { 6838 6839 bool Ok = false; 6840 if (skipToken(AsmToken::Colon, "expected a colon")) { 6841 if (trySkipId("swizzle")) { 6842 Ok = parseSwizzleMacro(Imm); 6843 } else { 6844 Ok = parseSwizzleOffset(Imm); 6845 } 6846 } 6847 6848 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6849 6850 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6851 } else { 6852 // Swizzle "offset" operand is optional. 6853 // If it is omitted, try parsing other optional operands. 6854 return parseOptionalOpr(Operands); 6855 } 6856 } 6857 6858 bool 6859 AMDGPUOperand::isSwizzle() const { 6860 return isImmTy(ImmTySwizzle); 6861 } 6862 6863 //===----------------------------------------------------------------------===// 6864 // VGPR Index Mode 6865 //===----------------------------------------------------------------------===// 6866 6867 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6868 6869 using namespace llvm::AMDGPU::VGPRIndexMode; 6870 6871 if (trySkipToken(AsmToken::RParen)) { 6872 return OFF; 6873 } 6874 6875 int64_t Imm = 0; 6876 6877 while (true) { 6878 unsigned Mode = 0; 6879 SMLoc S = getLoc(); 6880 6881 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6882 if (trySkipId(IdSymbolic[ModeId])) { 6883 Mode = 1 << ModeId; 6884 break; 6885 } 6886 } 6887 6888 if (Mode == 0) { 6889 Error(S, (Imm == 0)? 6890 "expected a VGPR index mode or a closing parenthesis" : 6891 "expected a VGPR index mode"); 6892 return UNDEF; 6893 } 6894 6895 if (Imm & Mode) { 6896 Error(S, "duplicate VGPR index mode"); 6897 return UNDEF; 6898 } 6899 Imm |= Mode; 6900 6901 if (trySkipToken(AsmToken::RParen)) 6902 break; 6903 if (!skipToken(AsmToken::Comma, 6904 "expected a comma or a closing parenthesis")) 6905 return UNDEF; 6906 } 6907 6908 return Imm; 6909 } 6910 6911 OperandMatchResultTy 6912 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6913 6914 using namespace llvm::AMDGPU::VGPRIndexMode; 6915 6916 int64_t Imm = 0; 6917 SMLoc S = getLoc(); 6918 6919 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6920 Imm = parseGPRIdxMacro(); 6921 if (Imm == UNDEF) 6922 return MatchOperand_ParseFail; 6923 } else { 6924 if (getParser().parseAbsoluteExpression(Imm)) 6925 return MatchOperand_ParseFail; 6926 if (Imm < 0 || !isUInt<4>(Imm)) { 6927 Error(S, "invalid immediate: only 4-bit values are legal"); 6928 return MatchOperand_ParseFail; 6929 } 6930 } 6931 6932 Operands.push_back( 6933 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6934 return MatchOperand_Success; 6935 } 6936 6937 bool AMDGPUOperand::isGPRIdxMode() const { 6938 return isImmTy(ImmTyGprIdxMode); 6939 } 6940 6941 //===----------------------------------------------------------------------===// 6942 // sopp branch targets 6943 //===----------------------------------------------------------------------===// 6944 6945 OperandMatchResultTy 6946 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6947 6948 // Make sure we are not parsing something 6949 // that looks like a label or an expression but is not. 6950 // This will improve error messages. 6951 if (isRegister() || isModifier()) 6952 return MatchOperand_NoMatch; 6953 6954 if (!parseExpr(Operands)) 6955 return MatchOperand_ParseFail; 6956 6957 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6958 assert(Opr.isImm() || Opr.isExpr()); 6959 SMLoc Loc = Opr.getStartLoc(); 6960 6961 // Currently we do not support arbitrary expressions as branch targets. 6962 // Only labels and absolute expressions are accepted. 6963 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6964 Error(Loc, "expected an absolute expression or a label"); 6965 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6966 Error(Loc, "expected a 16-bit signed jump offset"); 6967 } 6968 6969 return MatchOperand_Success; 6970 } 6971 6972 //===----------------------------------------------------------------------===// 6973 // Boolean holding registers 6974 //===----------------------------------------------------------------------===// 6975 6976 OperandMatchResultTy 6977 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6978 return parseReg(Operands); 6979 } 6980 6981 //===----------------------------------------------------------------------===// 6982 // mubuf 6983 //===----------------------------------------------------------------------===// 6984 6985 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6986 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6987 } 6988 6989 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6990 const OperandVector &Operands, 6991 bool IsAtomic, 6992 bool IsLds) { 6993 bool IsLdsOpcode = IsLds; 6994 bool HasLdsModifier = false; 6995 OptionalImmIndexMap OptionalIdx; 6996 unsigned FirstOperandIdx = 1; 6997 bool IsAtomicReturn = false; 6998 6999 if (IsAtomic) { 7000 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7001 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7002 if (!Op.isCPol()) 7003 continue; 7004 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7005 break; 7006 } 7007 7008 if (!IsAtomicReturn) { 7009 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7010 if (NewOpc != -1) 7011 Inst.setOpcode(NewOpc); 7012 } 7013 7014 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7015 SIInstrFlags::IsAtomicRet; 7016 } 7017 7018 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7019 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7020 7021 // Add the register arguments 7022 if (Op.isReg()) { 7023 Op.addRegOperands(Inst, 1); 7024 // Insert a tied src for atomic return dst. 7025 // This cannot be postponed as subsequent calls to 7026 // addImmOperands rely on correct number of MC operands. 7027 if (IsAtomicReturn && i == FirstOperandIdx) 7028 Op.addRegOperands(Inst, 1); 7029 continue; 7030 } 7031 7032 // Handle the case where soffset is an immediate 7033 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7034 Op.addImmOperands(Inst, 1); 7035 continue; 7036 } 7037 7038 HasLdsModifier |= Op.isLDS(); 7039 7040 // Handle tokens like 'offen' which are sometimes hard-coded into the 7041 // asm string. There are no MCInst operands for these. 7042 if (Op.isToken()) { 7043 continue; 7044 } 7045 assert(Op.isImm()); 7046 7047 // Handle optional arguments 7048 OptionalIdx[Op.getImmTy()] = i; 7049 } 7050 7051 // This is a workaround for an llvm quirk which may result in an 7052 // incorrect instruction selection. Lds and non-lds versions of 7053 // MUBUF instructions are identical except that lds versions 7054 // have mandatory 'lds' modifier. However this modifier follows 7055 // optional modifiers and llvm asm matcher regards this 'lds' 7056 // modifier as an optional one. As a result, an lds version 7057 // of opcode may be selected even if it has no 'lds' modifier. 7058 if (IsLdsOpcode && !HasLdsModifier) { 7059 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7060 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7061 Inst.setOpcode(NoLdsOpcode); 7062 IsLdsOpcode = false; 7063 } 7064 } 7065 7066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7067 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7068 7069 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7071 } 7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7073 } 7074 7075 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7076 OptionalImmIndexMap OptionalIdx; 7077 7078 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7079 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7080 7081 // Add the register arguments 7082 if (Op.isReg()) { 7083 Op.addRegOperands(Inst, 1); 7084 continue; 7085 } 7086 7087 // Handle the case where soffset is an immediate 7088 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7089 Op.addImmOperands(Inst, 1); 7090 continue; 7091 } 7092 7093 // Handle tokens like 'offen' which are sometimes hard-coded into the 7094 // asm string. There are no MCInst operands for these. 7095 if (Op.isToken()) { 7096 continue; 7097 } 7098 assert(Op.isImm()); 7099 7100 // Handle optional arguments 7101 OptionalIdx[Op.getImmTy()] = i; 7102 } 7103 7104 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7105 AMDGPUOperand::ImmTyOffset); 7106 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7108 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7110 } 7111 7112 //===----------------------------------------------------------------------===// 7113 // mimg 7114 //===----------------------------------------------------------------------===// 7115 7116 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7117 bool IsAtomic) { 7118 unsigned I = 1; 7119 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7120 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7121 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7122 } 7123 7124 if (IsAtomic) { 7125 // Add src, same as dst 7126 assert(Desc.getNumDefs() == 1); 7127 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7128 } 7129 7130 OptionalImmIndexMap OptionalIdx; 7131 7132 for (unsigned E = Operands.size(); I != E; ++I) { 7133 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7134 7135 // Add the register arguments 7136 if (Op.isReg()) { 7137 Op.addRegOperands(Inst, 1); 7138 } else if (Op.isImmModifier()) { 7139 OptionalIdx[Op.getImmTy()] = I; 7140 } else if (!Op.isToken()) { 7141 llvm_unreachable("unexpected operand type"); 7142 } 7143 } 7144 7145 bool IsGFX10Plus = isGFX10Plus(); 7146 7147 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7148 if (IsGFX10Plus) 7149 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7150 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7151 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7152 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7153 if (IsGFX10Plus) 7154 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7155 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7158 if (!IsGFX10Plus) 7159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7161 } 7162 7163 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7164 cvtMIMG(Inst, Operands, true); 7165 } 7166 7167 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7168 OptionalImmIndexMap OptionalIdx; 7169 bool IsAtomicReturn = false; 7170 7171 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7172 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7173 if (!Op.isCPol()) 7174 continue; 7175 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7176 break; 7177 } 7178 7179 if (!IsAtomicReturn) { 7180 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7181 if (NewOpc != -1) 7182 Inst.setOpcode(NewOpc); 7183 } 7184 7185 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7186 SIInstrFlags::IsAtomicRet; 7187 7188 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7189 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7190 7191 // Add the register arguments 7192 if (Op.isReg()) { 7193 Op.addRegOperands(Inst, 1); 7194 if (IsAtomicReturn && i == 1) 7195 Op.addRegOperands(Inst, 1); 7196 continue; 7197 } 7198 7199 // Handle the case where soffset is an immediate 7200 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7201 Op.addImmOperands(Inst, 1); 7202 continue; 7203 } 7204 7205 // Handle tokens like 'offen' which are sometimes hard-coded into the 7206 // asm string. There are no MCInst operands for these. 7207 if (Op.isToken()) { 7208 continue; 7209 } 7210 assert(Op.isImm()); 7211 7212 // Handle optional arguments 7213 OptionalIdx[Op.getImmTy()] = i; 7214 } 7215 7216 if ((int)Inst.getNumOperands() <= 7217 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7220 } 7221 7222 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7223 const OperandVector &Operands) { 7224 for (unsigned I = 1; I < Operands.size(); ++I) { 7225 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7226 if (Operand.isReg()) 7227 Operand.addRegOperands(Inst, 1); 7228 } 7229 7230 Inst.addOperand(MCOperand::createImm(1)); // a16 7231 } 7232 7233 //===----------------------------------------------------------------------===// 7234 // smrd 7235 //===----------------------------------------------------------------------===// 7236 7237 bool AMDGPUOperand::isSMRDOffset8() const { 7238 return isImm() && isUInt<8>(getImm()); 7239 } 7240 7241 bool AMDGPUOperand::isSMEMOffset() const { 7242 return isImm(); // Offset range is checked later by validator. 7243 } 7244 7245 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7246 // 32-bit literals are only supported on CI and we only want to use them 7247 // when the offset is > 8-bits. 7248 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7249 } 7250 7251 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7252 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7253 } 7254 7255 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7256 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7257 } 7258 7259 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7260 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7261 } 7262 7263 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7264 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7265 } 7266 7267 //===----------------------------------------------------------------------===// 7268 // vop3 7269 //===----------------------------------------------------------------------===// 7270 7271 static bool ConvertOmodMul(int64_t &Mul) { 7272 if (Mul != 1 && Mul != 2 && Mul != 4) 7273 return false; 7274 7275 Mul >>= 1; 7276 return true; 7277 } 7278 7279 static bool ConvertOmodDiv(int64_t &Div) { 7280 if (Div == 1) { 7281 Div = 0; 7282 return true; 7283 } 7284 7285 if (Div == 2) { 7286 Div = 3; 7287 return true; 7288 } 7289 7290 return false; 7291 } 7292 7293 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7294 // This is intentional and ensures compatibility with sp3. 7295 // See bug 35397 for details. 7296 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7297 if (BoundCtrl == 0 || BoundCtrl == 1) { 7298 BoundCtrl = 1; 7299 return true; 7300 } 7301 return false; 7302 } 7303 7304 // Note: the order in this table matches the order of operands in AsmString. 7305 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7306 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7307 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7308 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7309 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7310 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7311 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7312 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7313 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7314 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7315 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7316 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7317 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7318 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7319 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7320 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7321 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7322 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7323 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7324 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7325 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7326 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7327 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7328 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7329 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7330 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7331 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7332 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7333 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7334 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7335 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7336 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7337 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7338 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7339 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7340 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7341 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7342 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7343 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7344 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7345 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7346 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7347 }; 7348 7349 void AMDGPUAsmParser::onBeginOfFile() { 7350 if (!getParser().getStreamer().getTargetStreamer() || 7351 getSTI().getTargetTriple().getArch() == Triple::r600) 7352 return; 7353 7354 if (!getTargetStreamer().getTargetID()) 7355 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7356 7357 if (isHsaAbiVersion3Or4(&getSTI())) 7358 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7359 } 7360 7361 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7362 7363 OperandMatchResultTy res = parseOptionalOpr(Operands); 7364 7365 // This is a hack to enable hardcoded mandatory operands which follow 7366 // optional operands. 7367 // 7368 // Current design assumes that all operands after the first optional operand 7369 // are also optional. However implementation of some instructions violates 7370 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7371 // 7372 // To alleviate this problem, we have to (implicitly) parse extra operands 7373 // to make sure autogenerated parser of custom operands never hit hardcoded 7374 // mandatory operands. 7375 7376 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7377 if (res != MatchOperand_Success || 7378 isToken(AsmToken::EndOfStatement)) 7379 break; 7380 7381 trySkipToken(AsmToken::Comma); 7382 res = parseOptionalOpr(Operands); 7383 } 7384 7385 return res; 7386 } 7387 7388 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7389 OperandMatchResultTy res; 7390 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7391 // try to parse any optional operand here 7392 if (Op.IsBit) { 7393 res = parseNamedBit(Op.Name, Operands, Op.Type); 7394 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7395 res = parseOModOperand(Operands); 7396 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7397 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7398 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7399 res = parseSDWASel(Operands, Op.Name, Op.Type); 7400 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7401 res = parseSDWADstUnused(Operands); 7402 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7403 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7404 Op.Type == AMDGPUOperand::ImmTyNegLo || 7405 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7406 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7407 Op.ConvertResult); 7408 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7409 res = parseDim(Operands); 7410 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7411 res = parseCPol(Operands); 7412 } else { 7413 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7414 } 7415 if (res != MatchOperand_NoMatch) { 7416 return res; 7417 } 7418 } 7419 return MatchOperand_NoMatch; 7420 } 7421 7422 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7423 StringRef Name = getTokenStr(); 7424 if (Name == "mul") { 7425 return parseIntWithPrefix("mul", Operands, 7426 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7427 } 7428 7429 if (Name == "div") { 7430 return parseIntWithPrefix("div", Operands, 7431 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7432 } 7433 7434 return MatchOperand_NoMatch; 7435 } 7436 7437 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7438 cvtVOP3P(Inst, Operands); 7439 7440 int Opc = Inst.getOpcode(); 7441 7442 int SrcNum; 7443 const int Ops[] = { AMDGPU::OpName::src0, 7444 AMDGPU::OpName::src1, 7445 AMDGPU::OpName::src2 }; 7446 for (SrcNum = 0; 7447 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7448 ++SrcNum); 7449 assert(SrcNum > 0); 7450 7451 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7452 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7453 7454 if ((OpSel & (1 << SrcNum)) != 0) { 7455 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7456 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7457 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7458 } 7459 } 7460 7461 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7462 // 1. This operand is input modifiers 7463 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7464 // 2. This is not last operand 7465 && Desc.NumOperands > (OpNum + 1) 7466 // 3. Next operand is register class 7467 && Desc.OpInfo[OpNum + 1].RegClass != -1 7468 // 4. Next register is not tied to any other operand 7469 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7470 } 7471 7472 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7473 { 7474 OptionalImmIndexMap OptionalIdx; 7475 unsigned Opc = Inst.getOpcode(); 7476 7477 unsigned I = 1; 7478 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7479 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7480 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7481 } 7482 7483 for (unsigned E = Operands.size(); I != E; ++I) { 7484 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7485 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7486 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7487 } else if (Op.isInterpSlot() || 7488 Op.isInterpAttr() || 7489 Op.isAttrChan()) { 7490 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7491 } else if (Op.isImmModifier()) { 7492 OptionalIdx[Op.getImmTy()] = I; 7493 } else { 7494 llvm_unreachable("unhandled operand type"); 7495 } 7496 } 7497 7498 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7500 } 7501 7502 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7503 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7504 } 7505 7506 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7507 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7508 } 7509 } 7510 7511 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7512 OptionalImmIndexMap &OptionalIdx) { 7513 unsigned Opc = Inst.getOpcode(); 7514 7515 unsigned I = 1; 7516 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7517 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7518 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7519 } 7520 7521 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7522 // This instruction has src modifiers 7523 for (unsigned E = Operands.size(); I != E; ++I) { 7524 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7525 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7526 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7527 } else if (Op.isImmModifier()) { 7528 OptionalIdx[Op.getImmTy()] = I; 7529 } else if (Op.isRegOrImm()) { 7530 Op.addRegOrImmOperands(Inst, 1); 7531 } else { 7532 llvm_unreachable("unhandled operand type"); 7533 } 7534 } 7535 } else { 7536 // No src modifiers 7537 for (unsigned E = Operands.size(); I != E; ++I) { 7538 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7539 if (Op.isMod()) { 7540 OptionalIdx[Op.getImmTy()] = I; 7541 } else { 7542 Op.addRegOrImmOperands(Inst, 1); 7543 } 7544 } 7545 } 7546 7547 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7548 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7549 } 7550 7551 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7552 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7553 } 7554 7555 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7556 // it has src2 register operand that is tied to dst operand 7557 // we don't allow modifiers for this operand in assembler so src2_modifiers 7558 // should be 0. 7559 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7560 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7561 Opc == AMDGPU::V_MAC_F32_e64_vi || 7562 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7563 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7564 Opc == AMDGPU::V_MAC_F16_e64_vi || 7565 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7566 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7567 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7568 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7569 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7570 auto it = Inst.begin(); 7571 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7572 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7573 ++it; 7574 // Copy the operand to ensure it's not invalidated when Inst grows. 7575 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7576 } 7577 } 7578 7579 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7580 OptionalImmIndexMap OptionalIdx; 7581 cvtVOP3(Inst, Operands, OptionalIdx); 7582 } 7583 7584 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7585 OptionalImmIndexMap &OptIdx) { 7586 const int Opc = Inst.getOpcode(); 7587 const MCInstrDesc &Desc = MII.get(Opc); 7588 7589 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7590 7591 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7592 assert(!IsPacked); 7593 Inst.addOperand(Inst.getOperand(0)); 7594 } 7595 7596 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7597 // instruction, and then figure out where to actually put the modifiers 7598 7599 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7600 if (OpSelIdx != -1) { 7601 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7602 } 7603 7604 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7605 if (OpSelHiIdx != -1) { 7606 int DefaultVal = IsPacked ? -1 : 0; 7607 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7608 DefaultVal); 7609 } 7610 7611 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7612 if (NegLoIdx != -1) { 7613 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7614 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7615 } 7616 7617 const int Ops[] = { AMDGPU::OpName::src0, 7618 AMDGPU::OpName::src1, 7619 AMDGPU::OpName::src2 }; 7620 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7621 AMDGPU::OpName::src1_modifiers, 7622 AMDGPU::OpName::src2_modifiers }; 7623 7624 unsigned OpSel = 0; 7625 unsigned OpSelHi = 0; 7626 unsigned NegLo = 0; 7627 unsigned NegHi = 0; 7628 7629 if (OpSelIdx != -1) 7630 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7631 7632 if (OpSelHiIdx != -1) 7633 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7634 7635 if (NegLoIdx != -1) { 7636 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7637 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7638 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7639 } 7640 7641 for (int J = 0; J < 3; ++J) { 7642 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7643 if (OpIdx == -1) 7644 break; 7645 7646 uint32_t ModVal = 0; 7647 7648 if ((OpSel & (1 << J)) != 0) 7649 ModVal |= SISrcMods::OP_SEL_0; 7650 7651 if ((OpSelHi & (1 << J)) != 0) 7652 ModVal |= SISrcMods::OP_SEL_1; 7653 7654 if ((NegLo & (1 << J)) != 0) 7655 ModVal |= SISrcMods::NEG; 7656 7657 if ((NegHi & (1 << J)) != 0) 7658 ModVal |= SISrcMods::NEG_HI; 7659 7660 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7661 7662 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7663 } 7664 } 7665 7666 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7667 OptionalImmIndexMap OptIdx; 7668 cvtVOP3(Inst, Operands, OptIdx); 7669 cvtVOP3P(Inst, Operands, OptIdx); 7670 } 7671 7672 //===----------------------------------------------------------------------===// 7673 // dpp 7674 //===----------------------------------------------------------------------===// 7675 7676 bool AMDGPUOperand::isDPP8() const { 7677 return isImmTy(ImmTyDPP8); 7678 } 7679 7680 bool AMDGPUOperand::isDPPCtrl() const { 7681 using namespace AMDGPU::DPP; 7682 7683 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7684 if (result) { 7685 int64_t Imm = getImm(); 7686 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7687 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7688 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7689 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7690 (Imm == DppCtrl::WAVE_SHL1) || 7691 (Imm == DppCtrl::WAVE_ROL1) || 7692 (Imm == DppCtrl::WAVE_SHR1) || 7693 (Imm == DppCtrl::WAVE_ROR1) || 7694 (Imm == DppCtrl::ROW_MIRROR) || 7695 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7696 (Imm == DppCtrl::BCAST15) || 7697 (Imm == DppCtrl::BCAST31) || 7698 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7699 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7700 } 7701 return false; 7702 } 7703 7704 //===----------------------------------------------------------------------===// 7705 // mAI 7706 //===----------------------------------------------------------------------===// 7707 7708 bool AMDGPUOperand::isBLGP() const { 7709 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7710 } 7711 7712 bool AMDGPUOperand::isCBSZ() const { 7713 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7714 } 7715 7716 bool AMDGPUOperand::isABID() const { 7717 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7718 } 7719 7720 bool AMDGPUOperand::isS16Imm() const { 7721 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7722 } 7723 7724 bool AMDGPUOperand::isU16Imm() const { 7725 return isImm() && isUInt<16>(getImm()); 7726 } 7727 7728 //===----------------------------------------------------------------------===// 7729 // dim 7730 //===----------------------------------------------------------------------===// 7731 7732 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7733 // We want to allow "dim:1D" etc., 7734 // but the initial 1 is tokenized as an integer. 7735 std::string Token; 7736 if (isToken(AsmToken::Integer)) { 7737 SMLoc Loc = getToken().getEndLoc(); 7738 Token = std::string(getTokenStr()); 7739 lex(); 7740 if (getLoc() != Loc) 7741 return false; 7742 } 7743 7744 StringRef Suffix; 7745 if (!parseId(Suffix)) 7746 return false; 7747 Token += Suffix; 7748 7749 StringRef DimId = Token; 7750 if (DimId.startswith("SQ_RSRC_IMG_")) 7751 DimId = DimId.drop_front(12); 7752 7753 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7754 if (!DimInfo) 7755 return false; 7756 7757 Encoding = DimInfo->Encoding; 7758 return true; 7759 } 7760 7761 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7762 if (!isGFX10Plus()) 7763 return MatchOperand_NoMatch; 7764 7765 SMLoc S = getLoc(); 7766 7767 if (!trySkipId("dim", AsmToken::Colon)) 7768 return MatchOperand_NoMatch; 7769 7770 unsigned Encoding; 7771 SMLoc Loc = getLoc(); 7772 if (!parseDimId(Encoding)) { 7773 Error(Loc, "invalid dim value"); 7774 return MatchOperand_ParseFail; 7775 } 7776 7777 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7778 AMDGPUOperand::ImmTyDim)); 7779 return MatchOperand_Success; 7780 } 7781 7782 //===----------------------------------------------------------------------===// 7783 // dpp 7784 //===----------------------------------------------------------------------===// 7785 7786 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7787 SMLoc S = getLoc(); 7788 7789 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7790 return MatchOperand_NoMatch; 7791 7792 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7793 7794 int64_t Sels[8]; 7795 7796 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7797 return MatchOperand_ParseFail; 7798 7799 for (size_t i = 0; i < 8; ++i) { 7800 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7801 return MatchOperand_ParseFail; 7802 7803 SMLoc Loc = getLoc(); 7804 if (getParser().parseAbsoluteExpression(Sels[i])) 7805 return MatchOperand_ParseFail; 7806 if (0 > Sels[i] || 7 < Sels[i]) { 7807 Error(Loc, "expected a 3-bit value"); 7808 return MatchOperand_ParseFail; 7809 } 7810 } 7811 7812 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7813 return MatchOperand_ParseFail; 7814 7815 unsigned DPP8 = 0; 7816 for (size_t i = 0; i < 8; ++i) 7817 DPP8 |= (Sels[i] << (i * 3)); 7818 7819 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7820 return MatchOperand_Success; 7821 } 7822 7823 bool 7824 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7825 const OperandVector &Operands) { 7826 if (Ctrl == "row_newbcast") 7827 return isGFX90A(); 7828 7829 if (Ctrl == "row_share" || 7830 Ctrl == "row_xmask") 7831 return isGFX10Plus(); 7832 7833 if (Ctrl == "wave_shl" || 7834 Ctrl == "wave_shr" || 7835 Ctrl == "wave_rol" || 7836 Ctrl == "wave_ror" || 7837 Ctrl == "row_bcast") 7838 return isVI() || isGFX9(); 7839 7840 return Ctrl == "row_mirror" || 7841 Ctrl == "row_half_mirror" || 7842 Ctrl == "quad_perm" || 7843 Ctrl == "row_shl" || 7844 Ctrl == "row_shr" || 7845 Ctrl == "row_ror"; 7846 } 7847 7848 int64_t 7849 AMDGPUAsmParser::parseDPPCtrlPerm() { 7850 // quad_perm:[%d,%d,%d,%d] 7851 7852 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7853 return -1; 7854 7855 int64_t Val = 0; 7856 for (int i = 0; i < 4; ++i) { 7857 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7858 return -1; 7859 7860 int64_t Temp; 7861 SMLoc Loc = getLoc(); 7862 if (getParser().parseAbsoluteExpression(Temp)) 7863 return -1; 7864 if (Temp < 0 || Temp > 3) { 7865 Error(Loc, "expected a 2-bit value"); 7866 return -1; 7867 } 7868 7869 Val += (Temp << i * 2); 7870 } 7871 7872 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7873 return -1; 7874 7875 return Val; 7876 } 7877 7878 int64_t 7879 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7880 using namespace AMDGPU::DPP; 7881 7882 // sel:%d 7883 7884 int64_t Val; 7885 SMLoc Loc = getLoc(); 7886 7887 if (getParser().parseAbsoluteExpression(Val)) 7888 return -1; 7889 7890 struct DppCtrlCheck { 7891 int64_t Ctrl; 7892 int Lo; 7893 int Hi; 7894 }; 7895 7896 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7897 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7898 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7899 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7900 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7901 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7902 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7903 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7904 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7905 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7906 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7907 .Default({-1, 0, 0}); 7908 7909 bool Valid; 7910 if (Check.Ctrl == -1) { 7911 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7912 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7913 } else { 7914 Valid = Check.Lo <= Val && Val <= Check.Hi; 7915 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7916 } 7917 7918 if (!Valid) { 7919 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7920 return -1; 7921 } 7922 7923 return Val; 7924 } 7925 7926 OperandMatchResultTy 7927 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7928 using namespace AMDGPU::DPP; 7929 7930 if (!isToken(AsmToken::Identifier) || 7931 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7932 return MatchOperand_NoMatch; 7933 7934 SMLoc S = getLoc(); 7935 int64_t Val = -1; 7936 StringRef Ctrl; 7937 7938 parseId(Ctrl); 7939 7940 if (Ctrl == "row_mirror") { 7941 Val = DppCtrl::ROW_MIRROR; 7942 } else if (Ctrl == "row_half_mirror") { 7943 Val = DppCtrl::ROW_HALF_MIRROR; 7944 } else { 7945 if (skipToken(AsmToken::Colon, "expected a colon")) { 7946 if (Ctrl == "quad_perm") { 7947 Val = parseDPPCtrlPerm(); 7948 } else { 7949 Val = parseDPPCtrlSel(Ctrl); 7950 } 7951 } 7952 } 7953 7954 if (Val == -1) 7955 return MatchOperand_ParseFail; 7956 7957 Operands.push_back( 7958 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7959 return MatchOperand_Success; 7960 } 7961 7962 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7963 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7964 } 7965 7966 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7967 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7968 } 7969 7970 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7971 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7972 } 7973 7974 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7975 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7976 } 7977 7978 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7979 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7980 } 7981 7982 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7983 OptionalImmIndexMap OptionalIdx; 7984 7985 unsigned Opc = Inst.getOpcode(); 7986 bool HasModifiers = 7987 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 7988 unsigned I = 1; 7989 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7990 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7991 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7992 } 7993 7994 int Fi = 0; 7995 for (unsigned E = Operands.size(); I != E; ++I) { 7996 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7997 MCOI::TIED_TO); 7998 if (TiedTo != -1) { 7999 assert((unsigned)TiedTo < Inst.getNumOperands()); 8000 // handle tied old or src2 for MAC instructions 8001 Inst.addOperand(Inst.getOperand(TiedTo)); 8002 } 8003 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8004 // Add the register arguments 8005 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8006 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8007 // Skip it. 8008 continue; 8009 } 8010 8011 if (IsDPP8) { 8012 if (Op.isDPP8()) { 8013 Op.addImmOperands(Inst, 1); 8014 } else if (HasModifiers && 8015 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8016 Op.addRegWithFPInputModsOperands(Inst, 2); 8017 } else if (Op.isFI()) { 8018 Fi = Op.getImm(); 8019 } else if (Op.isReg()) { 8020 Op.addRegOperands(Inst, 1); 8021 } else { 8022 llvm_unreachable("Invalid operand type"); 8023 } 8024 } else { 8025 if (HasModifiers && 8026 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8027 Op.addRegWithFPInputModsOperands(Inst, 2); 8028 } else if (Op.isReg()) { 8029 Op.addRegOperands(Inst, 1); 8030 } else if (Op.isDPPCtrl()) { 8031 Op.addImmOperands(Inst, 1); 8032 } else if (Op.isImm()) { 8033 // Handle optional arguments 8034 OptionalIdx[Op.getImmTy()] = I; 8035 } else { 8036 llvm_unreachable("Invalid operand type"); 8037 } 8038 } 8039 } 8040 8041 if (IsDPP8) { 8042 using namespace llvm::AMDGPU::DPP; 8043 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8044 } else { 8045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8048 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8050 } 8051 } 8052 } 8053 8054 //===----------------------------------------------------------------------===// 8055 // sdwa 8056 //===----------------------------------------------------------------------===// 8057 8058 OperandMatchResultTy 8059 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8060 AMDGPUOperand::ImmTy Type) { 8061 using namespace llvm::AMDGPU::SDWA; 8062 8063 SMLoc S = getLoc(); 8064 StringRef Value; 8065 OperandMatchResultTy res; 8066 8067 SMLoc StringLoc; 8068 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8069 if (res != MatchOperand_Success) { 8070 return res; 8071 } 8072 8073 int64_t Int; 8074 Int = StringSwitch<int64_t>(Value) 8075 .Case("BYTE_0", SdwaSel::BYTE_0) 8076 .Case("BYTE_1", SdwaSel::BYTE_1) 8077 .Case("BYTE_2", SdwaSel::BYTE_2) 8078 .Case("BYTE_3", SdwaSel::BYTE_3) 8079 .Case("WORD_0", SdwaSel::WORD_0) 8080 .Case("WORD_1", SdwaSel::WORD_1) 8081 .Case("DWORD", SdwaSel::DWORD) 8082 .Default(0xffffffff); 8083 8084 if (Int == 0xffffffff) { 8085 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8086 return MatchOperand_ParseFail; 8087 } 8088 8089 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8090 return MatchOperand_Success; 8091 } 8092 8093 OperandMatchResultTy 8094 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8095 using namespace llvm::AMDGPU::SDWA; 8096 8097 SMLoc S = getLoc(); 8098 StringRef Value; 8099 OperandMatchResultTy res; 8100 8101 SMLoc StringLoc; 8102 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8103 if (res != MatchOperand_Success) { 8104 return res; 8105 } 8106 8107 int64_t Int; 8108 Int = StringSwitch<int64_t>(Value) 8109 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8110 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8111 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8112 .Default(0xffffffff); 8113 8114 if (Int == 0xffffffff) { 8115 Error(StringLoc, "invalid dst_unused value"); 8116 return MatchOperand_ParseFail; 8117 } 8118 8119 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8120 return MatchOperand_Success; 8121 } 8122 8123 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8124 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8125 } 8126 8127 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8128 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8129 } 8130 8131 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8132 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8133 } 8134 8135 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8136 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8137 } 8138 8139 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8140 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8141 } 8142 8143 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8144 uint64_t BasicInstType, 8145 bool SkipDstVcc, 8146 bool SkipSrcVcc) { 8147 using namespace llvm::AMDGPU::SDWA; 8148 8149 OptionalImmIndexMap OptionalIdx; 8150 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8151 bool SkippedVcc = false; 8152 8153 unsigned I = 1; 8154 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8155 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8156 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8157 } 8158 8159 for (unsigned E = Operands.size(); I != E; ++I) { 8160 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8161 if (SkipVcc && !SkippedVcc && Op.isReg() && 8162 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8163 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8164 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8165 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8166 // Skip VCC only if we didn't skip it on previous iteration. 8167 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8168 if (BasicInstType == SIInstrFlags::VOP2 && 8169 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8170 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8171 SkippedVcc = true; 8172 continue; 8173 } else if (BasicInstType == SIInstrFlags::VOPC && 8174 Inst.getNumOperands() == 0) { 8175 SkippedVcc = true; 8176 continue; 8177 } 8178 } 8179 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8180 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8181 } else if (Op.isImm()) { 8182 // Handle optional arguments 8183 OptionalIdx[Op.getImmTy()] = I; 8184 } else { 8185 llvm_unreachable("Invalid operand type"); 8186 } 8187 SkippedVcc = false; 8188 } 8189 8190 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8191 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8192 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8193 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8194 switch (BasicInstType) { 8195 case SIInstrFlags::VOP1: 8196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8197 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8199 } 8200 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8201 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8203 break; 8204 8205 case SIInstrFlags::VOP2: 8206 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8207 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8208 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8209 } 8210 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8211 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8212 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8213 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8214 break; 8215 8216 case SIInstrFlags::VOPC: 8217 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8221 break; 8222 8223 default: 8224 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8225 } 8226 } 8227 8228 // special case v_mac_{f16, f32}: 8229 // it has src2 register operand that is tied to dst operand 8230 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8231 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8232 auto it = Inst.begin(); 8233 std::advance( 8234 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8235 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8236 } 8237 } 8238 8239 //===----------------------------------------------------------------------===// 8240 // mAI 8241 //===----------------------------------------------------------------------===// 8242 8243 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8244 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8245 } 8246 8247 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8248 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8249 } 8250 8251 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8252 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8253 } 8254 8255 /// Force static initialization. 8256 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8257 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8258 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8259 } 8260 8261 #define GET_REGISTER_MATCHER 8262 #define GET_MATCHER_IMPLEMENTATION 8263 #define GET_MNEMONIC_SPELL_CHECKER 8264 #define GET_MNEMONIC_CHECKER 8265 #include "AMDGPUGenAsmMatcher.inc" 8266 8267 // This fuction should be defined after auto-generated include so that we have 8268 // MatchClassKind enum defined 8269 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8270 unsigned Kind) { 8271 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8272 // But MatchInstructionImpl() expects to meet token and fails to validate 8273 // operand. This method checks if we are given immediate operand but expect to 8274 // get corresponding token. 8275 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8276 switch (Kind) { 8277 case MCK_addr64: 8278 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8279 case MCK_gds: 8280 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8281 case MCK_lds: 8282 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8283 case MCK_idxen: 8284 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8285 case MCK_offen: 8286 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8287 case MCK_SSrcB32: 8288 // When operands have expression values, they will return true for isToken, 8289 // because it is not possible to distinguish between a token and an 8290 // expression at parse time. MatchInstructionImpl() will always try to 8291 // match an operand as a token, when isToken returns true, and when the 8292 // name of the expression is not a valid token, the match will fail, 8293 // so we need to handle it here. 8294 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8295 case MCK_SSrcF32: 8296 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8297 case MCK_SoppBrTarget: 8298 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8299 case MCK_VReg32OrOff: 8300 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8301 case MCK_InterpSlot: 8302 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8303 case MCK_Attr: 8304 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8305 case MCK_AttrChan: 8306 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8307 case MCK_ImmSMEMOffset: 8308 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8309 case MCK_SReg_64: 8310 case MCK_SReg_64_XEXEC: 8311 // Null is defined as a 32-bit register but 8312 // it should also be enabled with 64-bit operands. 8313 // The following code enables it for SReg_64 operands 8314 // used as source and destination. Remaining source 8315 // operands are handled in isInlinableImm. 8316 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8317 default: 8318 return Match_InvalidOperand; 8319 } 8320 } 8321 8322 //===----------------------------------------------------------------------===// 8323 // endpgm 8324 //===----------------------------------------------------------------------===// 8325 8326 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8327 SMLoc S = getLoc(); 8328 int64_t Imm = 0; 8329 8330 if (!parseExpr(Imm)) { 8331 // The operand is optional, if not present default to 0 8332 Imm = 0; 8333 } 8334 8335 if (!isUInt<16>(Imm)) { 8336 Error(S, "expected a 16-bit value"); 8337 return MatchOperand_ParseFail; 8338 } 8339 8340 Operands.push_back( 8341 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8342 return MatchOperand_Success; 8343 } 8344 8345 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8346