1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1333 1334 bool isSI() const { 1335 return AMDGPU::isSI(getSTI()); 1336 } 1337 1338 bool isCI() const { 1339 return AMDGPU::isCI(getSTI()); 1340 } 1341 1342 bool isVI() const { 1343 return AMDGPU::isVI(getSTI()); 1344 } 1345 1346 bool isGFX9() const { 1347 return AMDGPU::isGFX9(getSTI()); 1348 } 1349 1350 bool isGFX90A() const { 1351 return AMDGPU::isGFX90A(getSTI()); 1352 } 1353 1354 bool isGFX9Plus() const { 1355 return AMDGPU::isGFX9Plus(getSTI()); 1356 } 1357 1358 bool isGFX10() const { 1359 return AMDGPU::isGFX10(getSTI()); 1360 } 1361 1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1363 1364 bool isGFX10_BEncoding() const { 1365 return AMDGPU::isGFX10_BEncoding(getSTI()); 1366 } 1367 1368 bool hasInv2PiInlineImm() const { 1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1370 } 1371 1372 bool hasFlatOffsets() const { 1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1374 } 1375 1376 bool hasArchitectedFlatScratch() const { 1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1378 } 1379 1380 bool hasSGPR102_SGPR103() const { 1381 return !isVI() && !isGFX9(); 1382 } 1383 1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1385 1386 bool hasIntClamp() const { 1387 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1388 } 1389 1390 AMDGPUTargetStreamer &getTargetStreamer() { 1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1392 return static_cast<AMDGPUTargetStreamer &>(TS); 1393 } 1394 1395 const MCRegisterInfo *getMRI() const { 1396 // We need this const_cast because for some reason getContext() is not const 1397 // in MCAsmParser. 1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1399 } 1400 1401 const MCInstrInfo *getMII() const { 1402 return &MII; 1403 } 1404 1405 const FeatureBitset &getFeatureBits() const { 1406 return getSTI().getFeatureBits(); 1407 } 1408 1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1412 1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1415 bool isForcedDPP() const { return ForcedDPP; } 1416 bool isForcedSDWA() const { return ForcedSDWA; } 1417 ArrayRef<unsigned> getMatchedVariants() const; 1418 StringRef getMatchedVariantName() const; 1419 1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1422 bool RestoreOnFailure); 1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1425 SMLoc &EndLoc) override; 1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1428 unsigned Kind) override; 1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1430 OperandVector &Operands, MCStreamer &Out, 1431 uint64_t &ErrorInfo, 1432 bool MatchingInlineAsm) override; 1433 bool ParseDirective(AsmToken DirectiveID) override; 1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1435 OperandMode Mode = OperandMode_Default); 1436 StringRef parseMnemonicSuffix(StringRef Name); 1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1438 SMLoc NameLoc, OperandVector &Operands) override; 1439 //bool ProcessInstruction(MCInst &Inst); 1440 1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1442 1443 OperandMatchResultTy 1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t &) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseOperandArrayWithPrefix(const char *Prefix, 1450 OperandVector &Operands, 1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1452 bool (*ConvertResult)(int64_t&) = nullptr); 1453 1454 OperandMatchResultTy 1455 parseNamedBit(StringRef Name, OperandVector &Operands, 1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1457 OperandMatchResultTy parseCPol(OperandVector &Operands); 1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1459 StringRef &Value, 1460 SMLoc &StringLoc); 1461 1462 bool isModifier(); 1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1467 bool parseSP3NegModifier(); 1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1469 OperandMatchResultTy parseReg(OperandVector &Operands); 1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1477 OperandMatchResultTy parseUfmt(int64_t &Format); 1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1482 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1485 1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1489 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1490 1491 bool parseCnt(int64_t &IntVal); 1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1493 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1494 1495 private: 1496 struct OperandInfoTy { 1497 SMLoc Loc; 1498 int64_t Id; 1499 bool IsSymbolic = false; 1500 bool IsDefined = false; 1501 1502 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1503 }; 1504 1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1506 bool validateSendMsg(const OperandInfoTy &Msg, 1507 const OperandInfoTy &Op, 1508 const OperandInfoTy &Stream); 1509 1510 bool parseHwregBody(OperandInfoTy &HwReg, 1511 OperandInfoTy &Offset, 1512 OperandInfoTy &Width); 1513 bool validateHwreg(const OperandInfoTy &HwReg, 1514 const OperandInfoTy &Offset, 1515 const OperandInfoTy &Width); 1516 1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1519 1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1521 const OperandVector &Operands) const; 1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1524 SMLoc getLitLoc(const OperandVector &Operands) const; 1525 SMLoc getConstLoc(const OperandVector &Operands) const; 1526 1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1530 bool validateSOPLiteral(const MCInst &Inst) const; 1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateIntClampSupported(const MCInst &Inst); 1534 bool validateMIMGAtomicDMask(const MCInst &Inst); 1535 bool validateMIMGGatherDMask(const MCInst &Inst); 1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateMIMGDataSize(const MCInst &Inst); 1538 bool validateMIMGAddrSize(const MCInst &Inst); 1539 bool validateMIMGD16(const MCInst &Inst); 1540 bool validateMIMGDim(const MCInst &Inst); 1541 bool validateMIMGMSAA(const MCInst &Inst); 1542 bool validateOpSel(const MCInst &Inst); 1543 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1544 bool validateVccOperand(unsigned Reg) const; 1545 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1547 bool validateAGPRLdSt(const MCInst &Inst) const; 1548 bool validateVGPRAlign(const MCInst &Inst) const; 1549 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1550 bool validateDivScale(const MCInst &Inst); 1551 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1552 const SMLoc &IDLoc); 1553 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1554 unsigned getConstantBusLimit(unsigned Opcode) const; 1555 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1556 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1557 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1558 1559 bool isSupportedMnemo(StringRef Mnemo, 1560 const FeatureBitset &FBS); 1561 bool isSupportedMnemo(StringRef Mnemo, 1562 const FeatureBitset &FBS, 1563 ArrayRef<unsigned> Variants); 1564 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1565 1566 bool isId(const StringRef Id) const; 1567 bool isId(const AsmToken &Token, const StringRef Id) const; 1568 bool isToken(const AsmToken::TokenKind Kind) const; 1569 bool trySkipId(const StringRef Id); 1570 bool trySkipId(const StringRef Pref, const StringRef Id); 1571 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1572 bool trySkipToken(const AsmToken::TokenKind Kind); 1573 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1574 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1575 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1576 1577 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1578 AsmToken::TokenKind getTokenKind() const; 1579 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1580 bool parseExpr(OperandVector &Operands); 1581 StringRef getTokenStr() const; 1582 AsmToken peekToken(); 1583 AsmToken getToken() const; 1584 SMLoc getLoc() const; 1585 void lex(); 1586 1587 public: 1588 void onBeginOfFile() override; 1589 1590 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1591 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1592 1593 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1594 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1595 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1596 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1597 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1598 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1599 1600 bool parseSwizzleOperand(int64_t &Op, 1601 const unsigned MinVal, 1602 const unsigned MaxVal, 1603 const StringRef ErrMsg, 1604 SMLoc &Loc); 1605 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1606 const unsigned MinVal, 1607 const unsigned MaxVal, 1608 const StringRef ErrMsg); 1609 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1610 bool parseSwizzleOffset(int64_t &Imm); 1611 bool parseSwizzleMacro(int64_t &Imm); 1612 bool parseSwizzleQuadPerm(int64_t &Imm); 1613 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1614 bool parseSwizzleBroadcast(int64_t &Imm); 1615 bool parseSwizzleSwap(int64_t &Imm); 1616 bool parseSwizzleReverse(int64_t &Imm); 1617 1618 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1619 int64_t parseGPRIdxMacro(); 1620 1621 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1622 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1623 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1624 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1625 1626 AMDGPUOperand::Ptr defaultCPol() const; 1627 1628 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1629 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1630 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1631 AMDGPUOperand::Ptr defaultFlatOffset() const; 1632 1633 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1634 1635 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1636 OptionalImmIndexMap &OptionalIdx); 1637 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1638 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1639 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1640 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1641 OptionalImmIndexMap &OptionalIdx); 1642 1643 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1644 1645 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1646 bool IsAtomic = false); 1647 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1648 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1649 1650 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1651 1652 bool parseDimId(unsigned &Encoding); 1653 OperandMatchResultTy parseDim(OperandVector &Operands); 1654 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1655 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1656 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1657 int64_t parseDPPCtrlSel(StringRef Ctrl); 1658 int64_t parseDPPCtrlPerm(); 1659 AMDGPUOperand::Ptr defaultRowMask() const; 1660 AMDGPUOperand::Ptr defaultBankMask() const; 1661 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1662 AMDGPUOperand::Ptr defaultFI() const; 1663 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1664 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1665 1666 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1667 AMDGPUOperand::ImmTy Type); 1668 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1669 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1670 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1671 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1672 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1673 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1674 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1675 uint64_t BasicInstType, 1676 bool SkipDstVcc = false, 1677 bool SkipSrcVcc = false); 1678 1679 AMDGPUOperand::Ptr defaultBLGP() const; 1680 AMDGPUOperand::Ptr defaultCBSZ() const; 1681 AMDGPUOperand::Ptr defaultABID() const; 1682 1683 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1684 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1685 }; 1686 1687 struct OptionalOperand { 1688 const char *Name; 1689 AMDGPUOperand::ImmTy Type; 1690 bool IsBit; 1691 bool (*ConvertResult)(int64_t&); 1692 }; 1693 1694 } // end anonymous namespace 1695 1696 // May be called with integer type with equivalent bitwidth. 1697 static const fltSemantics *getFltSemantics(unsigned Size) { 1698 switch (Size) { 1699 case 4: 1700 return &APFloat::IEEEsingle(); 1701 case 8: 1702 return &APFloat::IEEEdouble(); 1703 case 2: 1704 return &APFloat::IEEEhalf(); 1705 default: 1706 llvm_unreachable("unsupported fp type"); 1707 } 1708 } 1709 1710 static const fltSemantics *getFltSemantics(MVT VT) { 1711 return getFltSemantics(VT.getSizeInBits() / 8); 1712 } 1713 1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1715 switch (OperandType) { 1716 case AMDGPU::OPERAND_REG_IMM_INT32: 1717 case AMDGPU::OPERAND_REG_IMM_FP32: 1718 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1719 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1720 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1721 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1722 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1723 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1724 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1725 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1726 return &APFloat::IEEEsingle(); 1727 case AMDGPU::OPERAND_REG_IMM_INT64: 1728 case AMDGPU::OPERAND_REG_IMM_FP64: 1729 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1730 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1732 return &APFloat::IEEEdouble(); 1733 case AMDGPU::OPERAND_REG_IMM_INT16: 1734 case AMDGPU::OPERAND_REG_IMM_FP16: 1735 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1736 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1737 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1738 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1739 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1740 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1741 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1742 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1743 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1744 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1745 return &APFloat::IEEEhalf(); 1746 default: 1747 llvm_unreachable("unsupported fp type"); 1748 } 1749 } 1750 1751 //===----------------------------------------------------------------------===// 1752 // Operand 1753 //===----------------------------------------------------------------------===// 1754 1755 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1756 bool Lost; 1757 1758 // Convert literal to single precision 1759 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1760 APFloat::rmNearestTiesToEven, 1761 &Lost); 1762 // We allow precision lost but not overflow or underflow 1763 if (Status != APFloat::opOK && 1764 Lost && 1765 ((Status & APFloat::opOverflow) != 0 || 1766 (Status & APFloat::opUnderflow) != 0)) { 1767 return false; 1768 } 1769 1770 return true; 1771 } 1772 1773 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1774 return isUIntN(Size, Val) || isIntN(Size, Val); 1775 } 1776 1777 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1778 if (VT.getScalarType() == MVT::i16) { 1779 // FP immediate values are broken. 1780 return isInlinableIntLiteral(Val); 1781 } 1782 1783 // f16/v2f16 operands work correctly for all values. 1784 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1785 } 1786 1787 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1788 1789 // This is a hack to enable named inline values like 1790 // shared_base with both 32-bit and 64-bit operands. 1791 // Note that these values are defined as 1792 // 32-bit operands only. 1793 if (isInlineValue()) { 1794 return true; 1795 } 1796 1797 if (!isImmTy(ImmTyNone)) { 1798 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1799 return false; 1800 } 1801 // TODO: We should avoid using host float here. It would be better to 1802 // check the float bit values which is what a few other places do. 1803 // We've had bot failures before due to weird NaN support on mips hosts. 1804 1805 APInt Literal(64, Imm.Val); 1806 1807 if (Imm.IsFPImm) { // We got fp literal token 1808 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1809 return AMDGPU::isInlinableLiteral64(Imm.Val, 1810 AsmParser->hasInv2PiInlineImm()); 1811 } 1812 1813 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1814 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1815 return false; 1816 1817 if (type.getScalarSizeInBits() == 16) { 1818 return isInlineableLiteralOp16( 1819 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1820 type, AsmParser->hasInv2PiInlineImm()); 1821 } 1822 1823 // Check if single precision literal is inlinable 1824 return AMDGPU::isInlinableLiteral32( 1825 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1826 AsmParser->hasInv2PiInlineImm()); 1827 } 1828 1829 // We got int literal token. 1830 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1831 return AMDGPU::isInlinableLiteral64(Imm.Val, 1832 AsmParser->hasInv2PiInlineImm()); 1833 } 1834 1835 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1836 return false; 1837 } 1838 1839 if (type.getScalarSizeInBits() == 16) { 1840 return isInlineableLiteralOp16( 1841 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1842 type, AsmParser->hasInv2PiInlineImm()); 1843 } 1844 1845 return AMDGPU::isInlinableLiteral32( 1846 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1847 AsmParser->hasInv2PiInlineImm()); 1848 } 1849 1850 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1851 // Check that this immediate can be added as literal 1852 if (!isImmTy(ImmTyNone)) { 1853 return false; 1854 } 1855 1856 if (!Imm.IsFPImm) { 1857 // We got int literal token. 1858 1859 if (type == MVT::f64 && hasFPModifiers()) { 1860 // Cannot apply fp modifiers to int literals preserving the same semantics 1861 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1862 // disable these cases. 1863 return false; 1864 } 1865 1866 unsigned Size = type.getSizeInBits(); 1867 if (Size == 64) 1868 Size = 32; 1869 1870 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1871 // types. 1872 return isSafeTruncation(Imm.Val, Size); 1873 } 1874 1875 // We got fp literal token 1876 if (type == MVT::f64) { // Expected 64-bit fp operand 1877 // We would set low 64-bits of literal to zeroes but we accept this literals 1878 return true; 1879 } 1880 1881 if (type == MVT::i64) { // Expected 64-bit int operand 1882 // We don't allow fp literals in 64-bit integer instructions. It is 1883 // unclear how we should encode them. 1884 return false; 1885 } 1886 1887 // We allow fp literals with f16x2 operands assuming that the specified 1888 // literal goes into the lower half and the upper half is zero. We also 1889 // require that the literal may be losslesly converted to f16. 1890 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1891 (type == MVT::v2i16)? MVT::i16 : 1892 (type == MVT::v2f32)? MVT::f32 : type; 1893 1894 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1895 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1896 } 1897 1898 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1899 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1900 } 1901 1902 bool AMDGPUOperand::isVRegWithInputMods() const { 1903 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1904 // GFX90A allows DPP on 64-bit operands. 1905 (isRegClass(AMDGPU::VReg_64RegClassID) && 1906 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1907 } 1908 1909 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1910 if (AsmParser->isVI()) 1911 return isVReg32(); 1912 else if (AsmParser->isGFX9Plus()) 1913 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1914 else 1915 return false; 1916 } 1917 1918 bool AMDGPUOperand::isSDWAFP16Operand() const { 1919 return isSDWAOperand(MVT::f16); 1920 } 1921 1922 bool AMDGPUOperand::isSDWAFP32Operand() const { 1923 return isSDWAOperand(MVT::f32); 1924 } 1925 1926 bool AMDGPUOperand::isSDWAInt16Operand() const { 1927 return isSDWAOperand(MVT::i16); 1928 } 1929 1930 bool AMDGPUOperand::isSDWAInt32Operand() const { 1931 return isSDWAOperand(MVT::i32); 1932 } 1933 1934 bool AMDGPUOperand::isBoolReg() const { 1935 auto FB = AsmParser->getFeatureBits(); 1936 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1937 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1938 } 1939 1940 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1941 { 1942 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1943 assert(Size == 2 || Size == 4 || Size == 8); 1944 1945 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1946 1947 if (Imm.Mods.Abs) { 1948 Val &= ~FpSignMask; 1949 } 1950 if (Imm.Mods.Neg) { 1951 Val ^= FpSignMask; 1952 } 1953 1954 return Val; 1955 } 1956 1957 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1958 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1959 Inst.getNumOperands())) { 1960 addLiteralImmOperand(Inst, Imm.Val, 1961 ApplyModifiers & 1962 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1963 } else { 1964 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1965 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1966 setImmKindNone(); 1967 } 1968 } 1969 1970 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1971 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1972 auto OpNum = Inst.getNumOperands(); 1973 // Check that this operand accepts literals 1974 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1975 1976 if (ApplyModifiers) { 1977 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1978 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1979 Val = applyInputFPModifiers(Val, Size); 1980 } 1981 1982 APInt Literal(64, Val); 1983 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1984 1985 if (Imm.IsFPImm) { // We got fp literal token 1986 switch (OpTy) { 1987 case AMDGPU::OPERAND_REG_IMM_INT64: 1988 case AMDGPU::OPERAND_REG_IMM_FP64: 1989 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1990 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1991 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1992 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1993 AsmParser->hasInv2PiInlineImm())) { 1994 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1995 setImmKindConst(); 1996 return; 1997 } 1998 1999 // Non-inlineable 2000 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2001 // For fp operands we check if low 32 bits are zeros 2002 if (Literal.getLoBits(32) != 0) { 2003 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2004 "Can't encode literal as exact 64-bit floating-point operand. " 2005 "Low 32-bits will be set to zero"); 2006 } 2007 2008 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2009 setImmKindLiteral(); 2010 return; 2011 } 2012 2013 // We don't allow fp literals in 64-bit integer instructions. It is 2014 // unclear how we should encode them. This case should be checked earlier 2015 // in predicate methods (isLiteralImm()) 2016 llvm_unreachable("fp literal in 64-bit integer instruction."); 2017 2018 case AMDGPU::OPERAND_REG_IMM_INT32: 2019 case AMDGPU::OPERAND_REG_IMM_FP32: 2020 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2021 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2022 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2023 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2024 case AMDGPU::OPERAND_REG_IMM_INT16: 2025 case AMDGPU::OPERAND_REG_IMM_FP16: 2026 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2027 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2028 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2029 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2030 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2031 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2032 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2033 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2034 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2035 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2036 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2037 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2038 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2039 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2040 bool lost; 2041 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2042 // Convert literal to single precision 2043 FPLiteral.convert(*getOpFltSemantics(OpTy), 2044 APFloat::rmNearestTiesToEven, &lost); 2045 // We allow precision lost but not overflow or underflow. This should be 2046 // checked earlier in isLiteralImm() 2047 2048 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2049 Inst.addOperand(MCOperand::createImm(ImmVal)); 2050 setImmKindLiteral(); 2051 return; 2052 } 2053 default: 2054 llvm_unreachable("invalid operand size"); 2055 } 2056 2057 return; 2058 } 2059 2060 // We got int literal token. 2061 // Only sign extend inline immediates. 2062 switch (OpTy) { 2063 case AMDGPU::OPERAND_REG_IMM_INT32: 2064 case AMDGPU::OPERAND_REG_IMM_FP32: 2065 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2066 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2067 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2068 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2069 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2070 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2071 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2072 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2073 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2074 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2075 if (isSafeTruncation(Val, 32) && 2076 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2077 AsmParser->hasInv2PiInlineImm())) { 2078 Inst.addOperand(MCOperand::createImm(Val)); 2079 setImmKindConst(); 2080 return; 2081 } 2082 2083 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2084 setImmKindLiteral(); 2085 return; 2086 2087 case AMDGPU::OPERAND_REG_IMM_INT64: 2088 case AMDGPU::OPERAND_REG_IMM_FP64: 2089 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2090 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2091 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2092 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2093 Inst.addOperand(MCOperand::createImm(Val)); 2094 setImmKindConst(); 2095 return; 2096 } 2097 2098 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2099 setImmKindLiteral(); 2100 return; 2101 2102 case AMDGPU::OPERAND_REG_IMM_INT16: 2103 case AMDGPU::OPERAND_REG_IMM_FP16: 2104 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2105 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2106 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2107 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2108 if (isSafeTruncation(Val, 16) && 2109 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2110 AsmParser->hasInv2PiInlineImm())) { 2111 Inst.addOperand(MCOperand::createImm(Val)); 2112 setImmKindConst(); 2113 return; 2114 } 2115 2116 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2117 setImmKindLiteral(); 2118 return; 2119 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2122 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2123 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2124 assert(isSafeTruncation(Val, 16)); 2125 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2126 AsmParser->hasInv2PiInlineImm())); 2127 2128 Inst.addOperand(MCOperand::createImm(Val)); 2129 return; 2130 } 2131 default: 2132 llvm_unreachable("invalid operand size"); 2133 } 2134 } 2135 2136 template <unsigned Bitwidth> 2137 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2138 APInt Literal(64, Imm.Val); 2139 setImmKindNone(); 2140 2141 if (!Imm.IsFPImm) { 2142 // We got int literal token. 2143 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2144 return; 2145 } 2146 2147 bool Lost; 2148 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2149 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2150 APFloat::rmNearestTiesToEven, &Lost); 2151 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2152 } 2153 2154 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2155 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2156 } 2157 2158 static bool isInlineValue(unsigned Reg) { 2159 switch (Reg) { 2160 case AMDGPU::SRC_SHARED_BASE: 2161 case AMDGPU::SRC_SHARED_LIMIT: 2162 case AMDGPU::SRC_PRIVATE_BASE: 2163 case AMDGPU::SRC_PRIVATE_LIMIT: 2164 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2165 return true; 2166 case AMDGPU::SRC_VCCZ: 2167 case AMDGPU::SRC_EXECZ: 2168 case AMDGPU::SRC_SCC: 2169 return true; 2170 case AMDGPU::SGPR_NULL: 2171 return true; 2172 default: 2173 return false; 2174 } 2175 } 2176 2177 bool AMDGPUOperand::isInlineValue() const { 2178 return isRegKind() && ::isInlineValue(getReg()); 2179 } 2180 2181 //===----------------------------------------------------------------------===// 2182 // AsmParser 2183 //===----------------------------------------------------------------------===// 2184 2185 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2186 if (Is == IS_VGPR) { 2187 switch (RegWidth) { 2188 default: return -1; 2189 case 1: return AMDGPU::VGPR_32RegClassID; 2190 case 2: return AMDGPU::VReg_64RegClassID; 2191 case 3: return AMDGPU::VReg_96RegClassID; 2192 case 4: return AMDGPU::VReg_128RegClassID; 2193 case 5: return AMDGPU::VReg_160RegClassID; 2194 case 6: return AMDGPU::VReg_192RegClassID; 2195 case 8: return AMDGPU::VReg_256RegClassID; 2196 case 16: return AMDGPU::VReg_512RegClassID; 2197 case 32: return AMDGPU::VReg_1024RegClassID; 2198 } 2199 } else if (Is == IS_TTMP) { 2200 switch (RegWidth) { 2201 default: return -1; 2202 case 1: return AMDGPU::TTMP_32RegClassID; 2203 case 2: return AMDGPU::TTMP_64RegClassID; 2204 case 4: return AMDGPU::TTMP_128RegClassID; 2205 case 8: return AMDGPU::TTMP_256RegClassID; 2206 case 16: return AMDGPU::TTMP_512RegClassID; 2207 } 2208 } else if (Is == IS_SGPR) { 2209 switch (RegWidth) { 2210 default: return -1; 2211 case 1: return AMDGPU::SGPR_32RegClassID; 2212 case 2: return AMDGPU::SGPR_64RegClassID; 2213 case 3: return AMDGPU::SGPR_96RegClassID; 2214 case 4: return AMDGPU::SGPR_128RegClassID; 2215 case 5: return AMDGPU::SGPR_160RegClassID; 2216 case 6: return AMDGPU::SGPR_192RegClassID; 2217 case 8: return AMDGPU::SGPR_256RegClassID; 2218 case 16: return AMDGPU::SGPR_512RegClassID; 2219 } 2220 } else if (Is == IS_AGPR) { 2221 switch (RegWidth) { 2222 default: return -1; 2223 case 1: return AMDGPU::AGPR_32RegClassID; 2224 case 2: return AMDGPU::AReg_64RegClassID; 2225 case 3: return AMDGPU::AReg_96RegClassID; 2226 case 4: return AMDGPU::AReg_128RegClassID; 2227 case 5: return AMDGPU::AReg_160RegClassID; 2228 case 6: return AMDGPU::AReg_192RegClassID; 2229 case 8: return AMDGPU::AReg_256RegClassID; 2230 case 16: return AMDGPU::AReg_512RegClassID; 2231 case 32: return AMDGPU::AReg_1024RegClassID; 2232 } 2233 } 2234 return -1; 2235 } 2236 2237 static unsigned getSpecialRegForName(StringRef RegName) { 2238 return StringSwitch<unsigned>(RegName) 2239 .Case("exec", AMDGPU::EXEC) 2240 .Case("vcc", AMDGPU::VCC) 2241 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2242 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2243 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2244 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2245 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2246 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2247 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2248 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2249 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2250 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2251 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2252 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2253 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2254 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2255 .Case("m0", AMDGPU::M0) 2256 .Case("vccz", AMDGPU::SRC_VCCZ) 2257 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2258 .Case("execz", AMDGPU::SRC_EXECZ) 2259 .Case("src_execz", AMDGPU::SRC_EXECZ) 2260 .Case("scc", AMDGPU::SRC_SCC) 2261 .Case("src_scc", AMDGPU::SRC_SCC) 2262 .Case("tba", AMDGPU::TBA) 2263 .Case("tma", AMDGPU::TMA) 2264 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2265 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2266 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2267 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2268 .Case("vcc_lo", AMDGPU::VCC_LO) 2269 .Case("vcc_hi", AMDGPU::VCC_HI) 2270 .Case("exec_lo", AMDGPU::EXEC_LO) 2271 .Case("exec_hi", AMDGPU::EXEC_HI) 2272 .Case("tma_lo", AMDGPU::TMA_LO) 2273 .Case("tma_hi", AMDGPU::TMA_HI) 2274 .Case("tba_lo", AMDGPU::TBA_LO) 2275 .Case("tba_hi", AMDGPU::TBA_HI) 2276 .Case("pc", AMDGPU::PC_REG) 2277 .Case("null", AMDGPU::SGPR_NULL) 2278 .Default(AMDGPU::NoRegister); 2279 } 2280 2281 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2282 SMLoc &EndLoc, bool RestoreOnFailure) { 2283 auto R = parseRegister(); 2284 if (!R) return true; 2285 assert(R->isReg()); 2286 RegNo = R->getReg(); 2287 StartLoc = R->getStartLoc(); 2288 EndLoc = R->getEndLoc(); 2289 return false; 2290 } 2291 2292 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2293 SMLoc &EndLoc) { 2294 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2295 } 2296 2297 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2298 SMLoc &StartLoc, 2299 SMLoc &EndLoc) { 2300 bool Result = 2301 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2302 bool PendingErrors = getParser().hasPendingError(); 2303 getParser().clearPendingErrors(); 2304 if (PendingErrors) 2305 return MatchOperand_ParseFail; 2306 if (Result) 2307 return MatchOperand_NoMatch; 2308 return MatchOperand_Success; 2309 } 2310 2311 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2312 RegisterKind RegKind, unsigned Reg1, 2313 SMLoc Loc) { 2314 switch (RegKind) { 2315 case IS_SPECIAL: 2316 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2317 Reg = AMDGPU::EXEC; 2318 RegWidth = 2; 2319 return true; 2320 } 2321 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2322 Reg = AMDGPU::FLAT_SCR; 2323 RegWidth = 2; 2324 return true; 2325 } 2326 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2327 Reg = AMDGPU::XNACK_MASK; 2328 RegWidth = 2; 2329 return true; 2330 } 2331 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2332 Reg = AMDGPU::VCC; 2333 RegWidth = 2; 2334 return true; 2335 } 2336 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2337 Reg = AMDGPU::TBA; 2338 RegWidth = 2; 2339 return true; 2340 } 2341 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2342 Reg = AMDGPU::TMA; 2343 RegWidth = 2; 2344 return true; 2345 } 2346 Error(Loc, "register does not fit in the list"); 2347 return false; 2348 case IS_VGPR: 2349 case IS_SGPR: 2350 case IS_AGPR: 2351 case IS_TTMP: 2352 if (Reg1 != Reg + RegWidth) { 2353 Error(Loc, "registers in a list must have consecutive indices"); 2354 return false; 2355 } 2356 RegWidth++; 2357 return true; 2358 default: 2359 llvm_unreachable("unexpected register kind"); 2360 } 2361 } 2362 2363 struct RegInfo { 2364 StringLiteral Name; 2365 RegisterKind Kind; 2366 }; 2367 2368 static constexpr RegInfo RegularRegisters[] = { 2369 {{"v"}, IS_VGPR}, 2370 {{"s"}, IS_SGPR}, 2371 {{"ttmp"}, IS_TTMP}, 2372 {{"acc"}, IS_AGPR}, 2373 {{"a"}, IS_AGPR}, 2374 }; 2375 2376 static bool isRegularReg(RegisterKind Kind) { 2377 return Kind == IS_VGPR || 2378 Kind == IS_SGPR || 2379 Kind == IS_TTMP || 2380 Kind == IS_AGPR; 2381 } 2382 2383 static const RegInfo* getRegularRegInfo(StringRef Str) { 2384 for (const RegInfo &Reg : RegularRegisters) 2385 if (Str.startswith(Reg.Name)) 2386 return &Reg; 2387 return nullptr; 2388 } 2389 2390 static bool getRegNum(StringRef Str, unsigned& Num) { 2391 return !Str.getAsInteger(10, Num); 2392 } 2393 2394 bool 2395 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2396 const AsmToken &NextToken) const { 2397 2398 // A list of consecutive registers: [s0,s1,s2,s3] 2399 if (Token.is(AsmToken::LBrac)) 2400 return true; 2401 2402 if (!Token.is(AsmToken::Identifier)) 2403 return false; 2404 2405 // A single register like s0 or a range of registers like s[0:1] 2406 2407 StringRef Str = Token.getString(); 2408 const RegInfo *Reg = getRegularRegInfo(Str); 2409 if (Reg) { 2410 StringRef RegName = Reg->Name; 2411 StringRef RegSuffix = Str.substr(RegName.size()); 2412 if (!RegSuffix.empty()) { 2413 unsigned Num; 2414 // A single register with an index: rXX 2415 if (getRegNum(RegSuffix, Num)) 2416 return true; 2417 } else { 2418 // A range of registers: r[XX:YY]. 2419 if (NextToken.is(AsmToken::LBrac)) 2420 return true; 2421 } 2422 } 2423 2424 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2425 } 2426 2427 bool 2428 AMDGPUAsmParser::isRegister() 2429 { 2430 return isRegister(getToken(), peekToken()); 2431 } 2432 2433 unsigned 2434 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2435 unsigned RegNum, 2436 unsigned RegWidth, 2437 SMLoc Loc) { 2438 2439 assert(isRegularReg(RegKind)); 2440 2441 unsigned AlignSize = 1; 2442 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2443 // SGPR and TTMP registers must be aligned. 2444 // Max required alignment is 4 dwords. 2445 AlignSize = std::min(RegWidth, 4u); 2446 } 2447 2448 if (RegNum % AlignSize != 0) { 2449 Error(Loc, "invalid register alignment"); 2450 return AMDGPU::NoRegister; 2451 } 2452 2453 unsigned RegIdx = RegNum / AlignSize; 2454 int RCID = getRegClass(RegKind, RegWidth); 2455 if (RCID == -1) { 2456 Error(Loc, "invalid or unsupported register size"); 2457 return AMDGPU::NoRegister; 2458 } 2459 2460 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2461 const MCRegisterClass RC = TRI->getRegClass(RCID); 2462 if (RegIdx >= RC.getNumRegs()) { 2463 Error(Loc, "register index is out of range"); 2464 return AMDGPU::NoRegister; 2465 } 2466 2467 return RC.getRegister(RegIdx); 2468 } 2469 2470 bool 2471 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2472 int64_t RegLo, RegHi; 2473 if (!skipToken(AsmToken::LBrac, "missing register index")) 2474 return false; 2475 2476 SMLoc FirstIdxLoc = getLoc(); 2477 SMLoc SecondIdxLoc; 2478 2479 if (!parseExpr(RegLo)) 2480 return false; 2481 2482 if (trySkipToken(AsmToken::Colon)) { 2483 SecondIdxLoc = getLoc(); 2484 if (!parseExpr(RegHi)) 2485 return false; 2486 } else { 2487 RegHi = RegLo; 2488 } 2489 2490 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2491 return false; 2492 2493 if (!isUInt<32>(RegLo)) { 2494 Error(FirstIdxLoc, "invalid register index"); 2495 return false; 2496 } 2497 2498 if (!isUInt<32>(RegHi)) { 2499 Error(SecondIdxLoc, "invalid register index"); 2500 return false; 2501 } 2502 2503 if (RegLo > RegHi) { 2504 Error(FirstIdxLoc, "first register index should not exceed second index"); 2505 return false; 2506 } 2507 2508 Num = static_cast<unsigned>(RegLo); 2509 Width = (RegHi - RegLo) + 1; 2510 return true; 2511 } 2512 2513 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2514 unsigned &RegNum, unsigned &RegWidth, 2515 SmallVectorImpl<AsmToken> &Tokens) { 2516 assert(isToken(AsmToken::Identifier)); 2517 unsigned Reg = getSpecialRegForName(getTokenStr()); 2518 if (Reg) { 2519 RegNum = 0; 2520 RegWidth = 1; 2521 RegKind = IS_SPECIAL; 2522 Tokens.push_back(getToken()); 2523 lex(); // skip register name 2524 } 2525 return Reg; 2526 } 2527 2528 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2529 unsigned &RegNum, unsigned &RegWidth, 2530 SmallVectorImpl<AsmToken> &Tokens) { 2531 assert(isToken(AsmToken::Identifier)); 2532 StringRef RegName = getTokenStr(); 2533 auto Loc = getLoc(); 2534 2535 const RegInfo *RI = getRegularRegInfo(RegName); 2536 if (!RI) { 2537 Error(Loc, "invalid register name"); 2538 return AMDGPU::NoRegister; 2539 } 2540 2541 Tokens.push_back(getToken()); 2542 lex(); // skip register name 2543 2544 RegKind = RI->Kind; 2545 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2546 if (!RegSuffix.empty()) { 2547 // Single 32-bit register: vXX. 2548 if (!getRegNum(RegSuffix, RegNum)) { 2549 Error(Loc, "invalid register index"); 2550 return AMDGPU::NoRegister; 2551 } 2552 RegWidth = 1; 2553 } else { 2554 // Range of registers: v[XX:YY]. ":YY" is optional. 2555 if (!ParseRegRange(RegNum, RegWidth)) 2556 return AMDGPU::NoRegister; 2557 } 2558 2559 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2560 } 2561 2562 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2563 unsigned &RegWidth, 2564 SmallVectorImpl<AsmToken> &Tokens) { 2565 unsigned Reg = AMDGPU::NoRegister; 2566 auto ListLoc = getLoc(); 2567 2568 if (!skipToken(AsmToken::LBrac, 2569 "expected a register or a list of registers")) { 2570 return AMDGPU::NoRegister; 2571 } 2572 2573 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2574 2575 auto Loc = getLoc(); 2576 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2577 return AMDGPU::NoRegister; 2578 if (RegWidth != 1) { 2579 Error(Loc, "expected a single 32-bit register"); 2580 return AMDGPU::NoRegister; 2581 } 2582 2583 for (; trySkipToken(AsmToken::Comma); ) { 2584 RegisterKind NextRegKind; 2585 unsigned NextReg, NextRegNum, NextRegWidth; 2586 Loc = getLoc(); 2587 2588 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2589 NextRegNum, NextRegWidth, 2590 Tokens)) { 2591 return AMDGPU::NoRegister; 2592 } 2593 if (NextRegWidth != 1) { 2594 Error(Loc, "expected a single 32-bit register"); 2595 return AMDGPU::NoRegister; 2596 } 2597 if (NextRegKind != RegKind) { 2598 Error(Loc, "registers in a list must be of the same kind"); 2599 return AMDGPU::NoRegister; 2600 } 2601 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2602 return AMDGPU::NoRegister; 2603 } 2604 2605 if (!skipToken(AsmToken::RBrac, 2606 "expected a comma or a closing square bracket")) { 2607 return AMDGPU::NoRegister; 2608 } 2609 2610 if (isRegularReg(RegKind)) 2611 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2612 2613 return Reg; 2614 } 2615 2616 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2617 unsigned &RegNum, unsigned &RegWidth, 2618 SmallVectorImpl<AsmToken> &Tokens) { 2619 auto Loc = getLoc(); 2620 Reg = AMDGPU::NoRegister; 2621 2622 if (isToken(AsmToken::Identifier)) { 2623 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2624 if (Reg == AMDGPU::NoRegister) 2625 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2626 } else { 2627 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2628 } 2629 2630 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2631 if (Reg == AMDGPU::NoRegister) { 2632 assert(Parser.hasPendingError()); 2633 return false; 2634 } 2635 2636 if (!subtargetHasRegister(*TRI, Reg)) { 2637 if (Reg == AMDGPU::SGPR_NULL) { 2638 Error(Loc, "'null' operand is not supported on this GPU"); 2639 } else { 2640 Error(Loc, "register not available on this GPU"); 2641 } 2642 return false; 2643 } 2644 2645 return true; 2646 } 2647 2648 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2649 unsigned &RegNum, unsigned &RegWidth, 2650 bool RestoreOnFailure /*=false*/) { 2651 Reg = AMDGPU::NoRegister; 2652 2653 SmallVector<AsmToken, 1> Tokens; 2654 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2655 if (RestoreOnFailure) { 2656 while (!Tokens.empty()) { 2657 getLexer().UnLex(Tokens.pop_back_val()); 2658 } 2659 } 2660 return true; 2661 } 2662 return false; 2663 } 2664 2665 Optional<StringRef> 2666 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2667 switch (RegKind) { 2668 case IS_VGPR: 2669 return StringRef(".amdgcn.next_free_vgpr"); 2670 case IS_SGPR: 2671 return StringRef(".amdgcn.next_free_sgpr"); 2672 default: 2673 return None; 2674 } 2675 } 2676 2677 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2678 auto SymbolName = getGprCountSymbolName(RegKind); 2679 assert(SymbolName && "initializing invalid register kind"); 2680 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2681 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2682 } 2683 2684 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2685 unsigned DwordRegIndex, 2686 unsigned RegWidth) { 2687 // Symbols are only defined for GCN targets 2688 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2689 return true; 2690 2691 auto SymbolName = getGprCountSymbolName(RegKind); 2692 if (!SymbolName) 2693 return true; 2694 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2695 2696 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2697 int64_t OldCount; 2698 2699 if (!Sym->isVariable()) 2700 return !Error(getLoc(), 2701 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2702 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2703 return !Error( 2704 getLoc(), 2705 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2706 2707 if (OldCount <= NewMax) 2708 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2709 2710 return true; 2711 } 2712 2713 std::unique_ptr<AMDGPUOperand> 2714 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2715 const auto &Tok = getToken(); 2716 SMLoc StartLoc = Tok.getLoc(); 2717 SMLoc EndLoc = Tok.getEndLoc(); 2718 RegisterKind RegKind; 2719 unsigned Reg, RegNum, RegWidth; 2720 2721 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2722 return nullptr; 2723 } 2724 if (isHsaAbiVersion3Or4(&getSTI())) { 2725 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2726 return nullptr; 2727 } else 2728 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2729 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2730 } 2731 2732 OperandMatchResultTy 2733 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2734 // TODO: add syntactic sugar for 1/(2*PI) 2735 2736 assert(!isRegister()); 2737 assert(!isModifier()); 2738 2739 const auto& Tok = getToken(); 2740 const auto& NextTok = peekToken(); 2741 bool IsReal = Tok.is(AsmToken::Real); 2742 SMLoc S = getLoc(); 2743 bool Negate = false; 2744 2745 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2746 lex(); 2747 IsReal = true; 2748 Negate = true; 2749 } 2750 2751 if (IsReal) { 2752 // Floating-point expressions are not supported. 2753 // Can only allow floating-point literals with an 2754 // optional sign. 2755 2756 StringRef Num = getTokenStr(); 2757 lex(); 2758 2759 APFloat RealVal(APFloat::IEEEdouble()); 2760 auto roundMode = APFloat::rmNearestTiesToEven; 2761 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2762 return MatchOperand_ParseFail; 2763 } 2764 if (Negate) 2765 RealVal.changeSign(); 2766 2767 Operands.push_back( 2768 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2769 AMDGPUOperand::ImmTyNone, true)); 2770 2771 return MatchOperand_Success; 2772 2773 } else { 2774 int64_t IntVal; 2775 const MCExpr *Expr; 2776 SMLoc S = getLoc(); 2777 2778 if (HasSP3AbsModifier) { 2779 // This is a workaround for handling expressions 2780 // as arguments of SP3 'abs' modifier, for example: 2781 // |1.0| 2782 // |-1| 2783 // |1+x| 2784 // This syntax is not compatible with syntax of standard 2785 // MC expressions (due to the trailing '|'). 2786 SMLoc EndLoc; 2787 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2788 return MatchOperand_ParseFail; 2789 } else { 2790 if (Parser.parseExpression(Expr)) 2791 return MatchOperand_ParseFail; 2792 } 2793 2794 if (Expr->evaluateAsAbsolute(IntVal)) { 2795 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2796 } else { 2797 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2798 } 2799 2800 return MatchOperand_Success; 2801 } 2802 2803 return MatchOperand_NoMatch; 2804 } 2805 2806 OperandMatchResultTy 2807 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2808 if (!isRegister()) 2809 return MatchOperand_NoMatch; 2810 2811 if (auto R = parseRegister()) { 2812 assert(R->isReg()); 2813 Operands.push_back(std::move(R)); 2814 return MatchOperand_Success; 2815 } 2816 return MatchOperand_ParseFail; 2817 } 2818 2819 OperandMatchResultTy 2820 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2821 auto res = parseReg(Operands); 2822 if (res != MatchOperand_NoMatch) { 2823 return res; 2824 } else if (isModifier()) { 2825 return MatchOperand_NoMatch; 2826 } else { 2827 return parseImm(Operands, HasSP3AbsMod); 2828 } 2829 } 2830 2831 bool 2832 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2833 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2834 const auto &str = Token.getString(); 2835 return str == "abs" || str == "neg" || str == "sext"; 2836 } 2837 return false; 2838 } 2839 2840 bool 2841 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2842 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2843 } 2844 2845 bool 2846 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2847 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2848 } 2849 2850 bool 2851 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2852 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2853 } 2854 2855 // Check if this is an operand modifier or an opcode modifier 2856 // which may look like an expression but it is not. We should 2857 // avoid parsing these modifiers as expressions. Currently 2858 // recognized sequences are: 2859 // |...| 2860 // abs(...) 2861 // neg(...) 2862 // sext(...) 2863 // -reg 2864 // -|...| 2865 // -abs(...) 2866 // name:... 2867 // Note that simple opcode modifiers like 'gds' may be parsed as 2868 // expressions; this is a special case. See getExpressionAsToken. 2869 // 2870 bool 2871 AMDGPUAsmParser::isModifier() { 2872 2873 AsmToken Tok = getToken(); 2874 AsmToken NextToken[2]; 2875 peekTokens(NextToken); 2876 2877 return isOperandModifier(Tok, NextToken[0]) || 2878 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2879 isOpcodeModifierWithVal(Tok, NextToken[0]); 2880 } 2881 2882 // Check if the current token is an SP3 'neg' modifier. 2883 // Currently this modifier is allowed in the following context: 2884 // 2885 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2886 // 2. Before an 'abs' modifier: -abs(...) 2887 // 3. Before an SP3 'abs' modifier: -|...| 2888 // 2889 // In all other cases "-" is handled as a part 2890 // of an expression that follows the sign. 2891 // 2892 // Note: When "-" is followed by an integer literal, 2893 // this is interpreted as integer negation rather 2894 // than a floating-point NEG modifier applied to N. 2895 // Beside being contr-intuitive, such use of floating-point 2896 // NEG modifier would have resulted in different meaning 2897 // of integer literals used with VOP1/2/C and VOP3, 2898 // for example: 2899 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2900 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2901 // Negative fp literals with preceding "-" are 2902 // handled likewise for unifomtity 2903 // 2904 bool 2905 AMDGPUAsmParser::parseSP3NegModifier() { 2906 2907 AsmToken NextToken[2]; 2908 peekTokens(NextToken); 2909 2910 if (isToken(AsmToken::Minus) && 2911 (isRegister(NextToken[0], NextToken[1]) || 2912 NextToken[0].is(AsmToken::Pipe) || 2913 isId(NextToken[0], "abs"))) { 2914 lex(); 2915 return true; 2916 } 2917 2918 return false; 2919 } 2920 2921 OperandMatchResultTy 2922 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2923 bool AllowImm) { 2924 bool Neg, SP3Neg; 2925 bool Abs, SP3Abs; 2926 SMLoc Loc; 2927 2928 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2929 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2930 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2931 return MatchOperand_ParseFail; 2932 } 2933 2934 SP3Neg = parseSP3NegModifier(); 2935 2936 Loc = getLoc(); 2937 Neg = trySkipId("neg"); 2938 if (Neg && SP3Neg) { 2939 Error(Loc, "expected register or immediate"); 2940 return MatchOperand_ParseFail; 2941 } 2942 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2943 return MatchOperand_ParseFail; 2944 2945 Abs = trySkipId("abs"); 2946 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2947 return MatchOperand_ParseFail; 2948 2949 Loc = getLoc(); 2950 SP3Abs = trySkipToken(AsmToken::Pipe); 2951 if (Abs && SP3Abs) { 2952 Error(Loc, "expected register or immediate"); 2953 return MatchOperand_ParseFail; 2954 } 2955 2956 OperandMatchResultTy Res; 2957 if (AllowImm) { 2958 Res = parseRegOrImm(Operands, SP3Abs); 2959 } else { 2960 Res = parseReg(Operands); 2961 } 2962 if (Res != MatchOperand_Success) { 2963 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2964 } 2965 2966 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2967 return MatchOperand_ParseFail; 2968 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2969 return MatchOperand_ParseFail; 2970 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2971 return MatchOperand_ParseFail; 2972 2973 AMDGPUOperand::Modifiers Mods; 2974 Mods.Abs = Abs || SP3Abs; 2975 Mods.Neg = Neg || SP3Neg; 2976 2977 if (Mods.hasFPModifiers()) { 2978 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2979 if (Op.isExpr()) { 2980 Error(Op.getStartLoc(), "expected an absolute expression"); 2981 return MatchOperand_ParseFail; 2982 } 2983 Op.setModifiers(Mods); 2984 } 2985 return MatchOperand_Success; 2986 } 2987 2988 OperandMatchResultTy 2989 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2990 bool AllowImm) { 2991 bool Sext = trySkipId("sext"); 2992 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2993 return MatchOperand_ParseFail; 2994 2995 OperandMatchResultTy Res; 2996 if (AllowImm) { 2997 Res = parseRegOrImm(Operands); 2998 } else { 2999 Res = parseReg(Operands); 3000 } 3001 if (Res != MatchOperand_Success) { 3002 return Sext? MatchOperand_ParseFail : Res; 3003 } 3004 3005 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3006 return MatchOperand_ParseFail; 3007 3008 AMDGPUOperand::Modifiers Mods; 3009 Mods.Sext = Sext; 3010 3011 if (Mods.hasIntModifiers()) { 3012 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3013 if (Op.isExpr()) { 3014 Error(Op.getStartLoc(), "expected an absolute expression"); 3015 return MatchOperand_ParseFail; 3016 } 3017 Op.setModifiers(Mods); 3018 } 3019 3020 return MatchOperand_Success; 3021 } 3022 3023 OperandMatchResultTy 3024 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3025 return parseRegOrImmWithFPInputMods(Operands, false); 3026 } 3027 3028 OperandMatchResultTy 3029 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3030 return parseRegOrImmWithIntInputMods(Operands, false); 3031 } 3032 3033 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3034 auto Loc = getLoc(); 3035 if (trySkipId("off")) { 3036 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3037 AMDGPUOperand::ImmTyOff, false)); 3038 return MatchOperand_Success; 3039 } 3040 3041 if (!isRegister()) 3042 return MatchOperand_NoMatch; 3043 3044 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3045 if (Reg) { 3046 Operands.push_back(std::move(Reg)); 3047 return MatchOperand_Success; 3048 } 3049 3050 return MatchOperand_ParseFail; 3051 3052 } 3053 3054 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3055 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3056 3057 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3058 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3059 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3060 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3061 return Match_InvalidOperand; 3062 3063 if ((TSFlags & SIInstrFlags::VOP3) && 3064 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3065 getForcedEncodingSize() != 64) 3066 return Match_PreferE32; 3067 3068 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3069 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3070 // v_mac_f32/16 allow only dst_sel == DWORD; 3071 auto OpNum = 3072 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3073 const auto &Op = Inst.getOperand(OpNum); 3074 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3075 return Match_InvalidOperand; 3076 } 3077 } 3078 3079 return Match_Success; 3080 } 3081 3082 static ArrayRef<unsigned> getAllVariants() { 3083 static const unsigned Variants[] = { 3084 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3085 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3086 }; 3087 3088 return makeArrayRef(Variants); 3089 } 3090 3091 // What asm variants we should check 3092 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3093 if (getForcedEncodingSize() == 32) { 3094 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3095 return makeArrayRef(Variants); 3096 } 3097 3098 if (isForcedVOP3()) { 3099 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3100 return makeArrayRef(Variants); 3101 } 3102 3103 if (isForcedSDWA()) { 3104 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3105 AMDGPUAsmVariants::SDWA9}; 3106 return makeArrayRef(Variants); 3107 } 3108 3109 if (isForcedDPP()) { 3110 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3111 return makeArrayRef(Variants); 3112 } 3113 3114 return getAllVariants(); 3115 } 3116 3117 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3118 if (getForcedEncodingSize() == 32) 3119 return "e32"; 3120 3121 if (isForcedVOP3()) 3122 return "e64"; 3123 3124 if (isForcedSDWA()) 3125 return "sdwa"; 3126 3127 if (isForcedDPP()) 3128 return "dpp"; 3129 3130 return ""; 3131 } 3132 3133 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3134 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3135 const unsigned Num = Desc.getNumImplicitUses(); 3136 for (unsigned i = 0; i < Num; ++i) { 3137 unsigned Reg = Desc.ImplicitUses[i]; 3138 switch (Reg) { 3139 case AMDGPU::FLAT_SCR: 3140 case AMDGPU::VCC: 3141 case AMDGPU::VCC_LO: 3142 case AMDGPU::VCC_HI: 3143 case AMDGPU::M0: 3144 return Reg; 3145 default: 3146 break; 3147 } 3148 } 3149 return AMDGPU::NoRegister; 3150 } 3151 3152 // NB: This code is correct only when used to check constant 3153 // bus limitations because GFX7 support no f16 inline constants. 3154 // Note that there are no cases when a GFX7 opcode violates 3155 // constant bus limitations due to the use of an f16 constant. 3156 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3157 unsigned OpIdx) const { 3158 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3159 3160 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3161 return false; 3162 } 3163 3164 const MCOperand &MO = Inst.getOperand(OpIdx); 3165 3166 int64_t Val = MO.getImm(); 3167 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3168 3169 switch (OpSize) { // expected operand size 3170 case 8: 3171 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3172 case 4: 3173 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3174 case 2: { 3175 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3176 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3177 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3178 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3179 return AMDGPU::isInlinableIntLiteral(Val); 3180 3181 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3182 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3183 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3184 return AMDGPU::isInlinableIntLiteralV216(Val); 3185 3186 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3187 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3188 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3189 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3190 3191 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3192 } 3193 default: 3194 llvm_unreachable("invalid operand size"); 3195 } 3196 } 3197 3198 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3199 if (!isGFX10Plus()) 3200 return 1; 3201 3202 switch (Opcode) { 3203 // 64-bit shift instructions can use only one scalar value input 3204 case AMDGPU::V_LSHLREV_B64_e64: 3205 case AMDGPU::V_LSHLREV_B64_gfx10: 3206 case AMDGPU::V_LSHRREV_B64_e64: 3207 case AMDGPU::V_LSHRREV_B64_gfx10: 3208 case AMDGPU::V_ASHRREV_I64_e64: 3209 case AMDGPU::V_ASHRREV_I64_gfx10: 3210 case AMDGPU::V_LSHL_B64_e64: 3211 case AMDGPU::V_LSHR_B64_e64: 3212 case AMDGPU::V_ASHR_I64_e64: 3213 return 1; 3214 default: 3215 return 2; 3216 } 3217 } 3218 3219 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3220 const MCOperand &MO = Inst.getOperand(OpIdx); 3221 if (MO.isImm()) { 3222 return !isInlineConstant(Inst, OpIdx); 3223 } else if (MO.isReg()) { 3224 auto Reg = MO.getReg(); 3225 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3226 auto PReg = mc2PseudoReg(Reg); 3227 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3228 } else { 3229 return true; 3230 } 3231 } 3232 3233 bool 3234 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3235 const OperandVector &Operands) { 3236 const unsigned Opcode = Inst.getOpcode(); 3237 const MCInstrDesc &Desc = MII.get(Opcode); 3238 unsigned LastSGPR = AMDGPU::NoRegister; 3239 unsigned ConstantBusUseCount = 0; 3240 unsigned NumLiterals = 0; 3241 unsigned LiteralSize; 3242 3243 if (Desc.TSFlags & 3244 (SIInstrFlags::VOPC | 3245 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3246 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3247 SIInstrFlags::SDWA)) { 3248 // Check special imm operands (used by madmk, etc) 3249 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3250 ++ConstantBusUseCount; 3251 } 3252 3253 SmallDenseSet<unsigned> SGPRsUsed; 3254 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3255 if (SGPRUsed != AMDGPU::NoRegister) { 3256 SGPRsUsed.insert(SGPRUsed); 3257 ++ConstantBusUseCount; 3258 } 3259 3260 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3261 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3262 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3263 3264 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3265 3266 for (int OpIdx : OpIndices) { 3267 if (OpIdx == -1) break; 3268 3269 const MCOperand &MO = Inst.getOperand(OpIdx); 3270 if (usesConstantBus(Inst, OpIdx)) { 3271 if (MO.isReg()) { 3272 LastSGPR = mc2PseudoReg(MO.getReg()); 3273 // Pairs of registers with a partial intersections like these 3274 // s0, s[0:1] 3275 // flat_scratch_lo, flat_scratch 3276 // flat_scratch_lo, flat_scratch_hi 3277 // are theoretically valid but they are disabled anyway. 3278 // Note that this code mimics SIInstrInfo::verifyInstruction 3279 if (!SGPRsUsed.count(LastSGPR)) { 3280 SGPRsUsed.insert(LastSGPR); 3281 ++ConstantBusUseCount; 3282 } 3283 } else { // Expression or a literal 3284 3285 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3286 continue; // special operand like VINTERP attr_chan 3287 3288 // An instruction may use only one literal. 3289 // This has been validated on the previous step. 3290 // See validateVOP3Literal. 3291 // This literal may be used as more than one operand. 3292 // If all these operands are of the same size, 3293 // this literal counts as one scalar value. 3294 // Otherwise it counts as 2 scalar values. 3295 // See "GFX10 Shader Programming", section 3.6.2.3. 3296 3297 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3298 if (Size < 4) Size = 4; 3299 3300 if (NumLiterals == 0) { 3301 NumLiterals = 1; 3302 LiteralSize = Size; 3303 } else if (LiteralSize != Size) { 3304 NumLiterals = 2; 3305 } 3306 } 3307 } 3308 } 3309 } 3310 ConstantBusUseCount += NumLiterals; 3311 3312 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3313 return true; 3314 3315 SMLoc LitLoc = getLitLoc(Operands); 3316 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3317 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3318 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3319 return false; 3320 } 3321 3322 bool 3323 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3324 const OperandVector &Operands) { 3325 const unsigned Opcode = Inst.getOpcode(); 3326 const MCInstrDesc &Desc = MII.get(Opcode); 3327 3328 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3329 if (DstIdx == -1 || 3330 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3331 return true; 3332 } 3333 3334 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3335 3336 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3337 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3338 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3339 3340 assert(DstIdx != -1); 3341 const MCOperand &Dst = Inst.getOperand(DstIdx); 3342 assert(Dst.isReg()); 3343 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3344 3345 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3346 3347 for (int SrcIdx : SrcIndices) { 3348 if (SrcIdx == -1) break; 3349 const MCOperand &Src = Inst.getOperand(SrcIdx); 3350 if (Src.isReg()) { 3351 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3352 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3353 Error(getRegLoc(SrcReg, Operands), 3354 "destination must be different than all sources"); 3355 return false; 3356 } 3357 } 3358 } 3359 3360 return true; 3361 } 3362 3363 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3364 3365 const unsigned Opc = Inst.getOpcode(); 3366 const MCInstrDesc &Desc = MII.get(Opc); 3367 3368 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3369 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3370 assert(ClampIdx != -1); 3371 return Inst.getOperand(ClampIdx).getImm() == 0; 3372 } 3373 3374 return true; 3375 } 3376 3377 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3378 3379 const unsigned Opc = Inst.getOpcode(); 3380 const MCInstrDesc &Desc = MII.get(Opc); 3381 3382 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3383 return true; 3384 3385 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3386 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3387 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3388 3389 assert(VDataIdx != -1); 3390 3391 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3392 return true; 3393 3394 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3395 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3396 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3397 if (DMask == 0) 3398 DMask = 1; 3399 3400 unsigned DataSize = 3401 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3402 if (hasPackedD16()) { 3403 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3404 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3405 DataSize = (DataSize + 1) / 2; 3406 } 3407 3408 return (VDataSize / 4) == DataSize + TFESize; 3409 } 3410 3411 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3412 const unsigned Opc = Inst.getOpcode(); 3413 const MCInstrDesc &Desc = MII.get(Opc); 3414 3415 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3416 return true; 3417 3418 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3419 3420 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3421 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3422 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3423 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3424 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3425 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3426 3427 assert(VAddr0Idx != -1); 3428 assert(SrsrcIdx != -1); 3429 assert(SrsrcIdx > VAddr0Idx); 3430 3431 if (DimIdx == -1) 3432 return true; // intersect_ray 3433 3434 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3435 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3436 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3437 unsigned ActualAddrSize = 3438 IsNSA ? SrsrcIdx - VAddr0Idx 3439 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3440 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3441 3442 unsigned ExpectedAddrSize = 3443 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3444 3445 if (!IsNSA) { 3446 if (ExpectedAddrSize > 8) 3447 ExpectedAddrSize = 16; 3448 else if (ExpectedAddrSize > 5) 3449 ExpectedAddrSize = 8; 3450 3451 // Allow oversized 8 VGPR vaddr when only 5 VGPR are required. 3452 // This provides backward compatibility for assembly created 3453 // before 160b types were directly supported. 3454 if (ExpectedAddrSize == 5 && ActualAddrSize == 8) 3455 return true; 3456 } 3457 3458 return ActualAddrSize == ExpectedAddrSize; 3459 } 3460 3461 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3462 3463 const unsigned Opc = Inst.getOpcode(); 3464 const MCInstrDesc &Desc = MII.get(Opc); 3465 3466 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3467 return true; 3468 if (!Desc.mayLoad() || !Desc.mayStore()) 3469 return true; // Not atomic 3470 3471 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3472 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3473 3474 // This is an incomplete check because image_atomic_cmpswap 3475 // may only use 0x3 and 0xf while other atomic operations 3476 // may use 0x1 and 0x3. However these limitations are 3477 // verified when we check that dmask matches dst size. 3478 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3479 } 3480 3481 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3482 3483 const unsigned Opc = Inst.getOpcode(); 3484 const MCInstrDesc &Desc = MII.get(Opc); 3485 3486 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3487 return true; 3488 3489 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3490 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3491 3492 // GATHER4 instructions use dmask in a different fashion compared to 3493 // other MIMG instructions. The only useful DMASK values are 3494 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3495 // (red,red,red,red) etc.) The ISA document doesn't mention 3496 // this. 3497 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3498 } 3499 3500 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3501 const unsigned Opc = Inst.getOpcode(); 3502 const MCInstrDesc &Desc = MII.get(Opc); 3503 3504 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3505 return true; 3506 3507 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3508 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3509 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3510 3511 if (!BaseOpcode->MSAA) 3512 return true; 3513 3514 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3515 assert(DimIdx != -1); 3516 3517 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3518 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3519 3520 return DimInfo->MSAA; 3521 } 3522 3523 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3524 { 3525 switch (Opcode) { 3526 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3527 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3528 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3529 return true; 3530 default: 3531 return false; 3532 } 3533 } 3534 3535 // movrels* opcodes should only allow VGPRS as src0. 3536 // This is specified in .td description for vop1/vop3, 3537 // but sdwa is handled differently. See isSDWAOperand. 3538 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3539 const OperandVector &Operands) { 3540 3541 const unsigned Opc = Inst.getOpcode(); 3542 const MCInstrDesc &Desc = MII.get(Opc); 3543 3544 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3545 return true; 3546 3547 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3548 assert(Src0Idx != -1); 3549 3550 SMLoc ErrLoc; 3551 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3552 if (Src0.isReg()) { 3553 auto Reg = mc2PseudoReg(Src0.getReg()); 3554 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3555 if (!isSGPR(Reg, TRI)) 3556 return true; 3557 ErrLoc = getRegLoc(Reg, Operands); 3558 } else { 3559 ErrLoc = getConstLoc(Operands); 3560 } 3561 3562 Error(ErrLoc, "source operand must be a VGPR"); 3563 return false; 3564 } 3565 3566 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3567 const OperandVector &Operands) { 3568 3569 const unsigned Opc = Inst.getOpcode(); 3570 3571 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3572 return true; 3573 3574 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3575 assert(Src0Idx != -1); 3576 3577 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3578 if (!Src0.isReg()) 3579 return true; 3580 3581 auto Reg = mc2PseudoReg(Src0.getReg()); 3582 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3583 if (isSGPR(Reg, TRI)) { 3584 Error(getRegLoc(Reg, Operands), 3585 "source operand must be either a VGPR or an inline constant"); 3586 return false; 3587 } 3588 3589 return true; 3590 } 3591 3592 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3593 switch (Inst.getOpcode()) { 3594 default: 3595 return true; 3596 case V_DIV_SCALE_F32_gfx6_gfx7: 3597 case V_DIV_SCALE_F32_vi: 3598 case V_DIV_SCALE_F32_gfx10: 3599 case V_DIV_SCALE_F64_gfx6_gfx7: 3600 case V_DIV_SCALE_F64_vi: 3601 case V_DIV_SCALE_F64_gfx10: 3602 break; 3603 } 3604 3605 // TODO: Check that src0 = src1 or src2. 3606 3607 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3608 AMDGPU::OpName::src2_modifiers, 3609 AMDGPU::OpName::src2_modifiers}) { 3610 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3611 .getImm() & 3612 SISrcMods::ABS) { 3613 return false; 3614 } 3615 } 3616 3617 return true; 3618 } 3619 3620 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3621 3622 const unsigned Opc = Inst.getOpcode(); 3623 const MCInstrDesc &Desc = MII.get(Opc); 3624 3625 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3626 return true; 3627 3628 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3629 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3630 if (isCI() || isSI()) 3631 return false; 3632 } 3633 3634 return true; 3635 } 3636 3637 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3638 const unsigned Opc = Inst.getOpcode(); 3639 const MCInstrDesc &Desc = MII.get(Opc); 3640 3641 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3642 return true; 3643 3644 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3645 if (DimIdx < 0) 3646 return true; 3647 3648 long Imm = Inst.getOperand(DimIdx).getImm(); 3649 if (Imm < 0 || Imm >= 8) 3650 return false; 3651 3652 return true; 3653 } 3654 3655 static bool IsRevOpcode(const unsigned Opcode) 3656 { 3657 switch (Opcode) { 3658 case AMDGPU::V_SUBREV_F32_e32: 3659 case AMDGPU::V_SUBREV_F32_e64: 3660 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3661 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3662 case AMDGPU::V_SUBREV_F32_e32_vi: 3663 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3664 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3665 case AMDGPU::V_SUBREV_F32_e64_vi: 3666 3667 case AMDGPU::V_SUBREV_CO_U32_e32: 3668 case AMDGPU::V_SUBREV_CO_U32_e64: 3669 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3670 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3671 3672 case AMDGPU::V_SUBBREV_U32_e32: 3673 case AMDGPU::V_SUBBREV_U32_e64: 3674 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3675 case AMDGPU::V_SUBBREV_U32_e32_vi: 3676 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3677 case AMDGPU::V_SUBBREV_U32_e64_vi: 3678 3679 case AMDGPU::V_SUBREV_U32_e32: 3680 case AMDGPU::V_SUBREV_U32_e64: 3681 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3682 case AMDGPU::V_SUBREV_U32_e32_vi: 3683 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3684 case AMDGPU::V_SUBREV_U32_e64_vi: 3685 3686 case AMDGPU::V_SUBREV_F16_e32: 3687 case AMDGPU::V_SUBREV_F16_e64: 3688 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3689 case AMDGPU::V_SUBREV_F16_e32_vi: 3690 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3691 case AMDGPU::V_SUBREV_F16_e64_vi: 3692 3693 case AMDGPU::V_SUBREV_U16_e32: 3694 case AMDGPU::V_SUBREV_U16_e64: 3695 case AMDGPU::V_SUBREV_U16_e32_vi: 3696 case AMDGPU::V_SUBREV_U16_e64_vi: 3697 3698 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3699 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3700 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3701 3702 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3703 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3704 3705 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3706 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3707 3708 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3709 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3710 3711 case AMDGPU::V_LSHRREV_B32_e32: 3712 case AMDGPU::V_LSHRREV_B32_e64: 3713 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3714 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3715 case AMDGPU::V_LSHRREV_B32_e32_vi: 3716 case AMDGPU::V_LSHRREV_B32_e64_vi: 3717 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3718 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3719 3720 case AMDGPU::V_ASHRREV_I32_e32: 3721 case AMDGPU::V_ASHRREV_I32_e64: 3722 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3723 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3724 case AMDGPU::V_ASHRREV_I32_e32_vi: 3725 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3726 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3727 case AMDGPU::V_ASHRREV_I32_e64_vi: 3728 3729 case AMDGPU::V_LSHLREV_B32_e32: 3730 case AMDGPU::V_LSHLREV_B32_e64: 3731 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3732 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3733 case AMDGPU::V_LSHLREV_B32_e32_vi: 3734 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3735 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3736 case AMDGPU::V_LSHLREV_B32_e64_vi: 3737 3738 case AMDGPU::V_LSHLREV_B16_e32: 3739 case AMDGPU::V_LSHLREV_B16_e64: 3740 case AMDGPU::V_LSHLREV_B16_e32_vi: 3741 case AMDGPU::V_LSHLREV_B16_e64_vi: 3742 case AMDGPU::V_LSHLREV_B16_gfx10: 3743 3744 case AMDGPU::V_LSHRREV_B16_e32: 3745 case AMDGPU::V_LSHRREV_B16_e64: 3746 case AMDGPU::V_LSHRREV_B16_e32_vi: 3747 case AMDGPU::V_LSHRREV_B16_e64_vi: 3748 case AMDGPU::V_LSHRREV_B16_gfx10: 3749 3750 case AMDGPU::V_ASHRREV_I16_e32: 3751 case AMDGPU::V_ASHRREV_I16_e64: 3752 case AMDGPU::V_ASHRREV_I16_e32_vi: 3753 case AMDGPU::V_ASHRREV_I16_e64_vi: 3754 case AMDGPU::V_ASHRREV_I16_gfx10: 3755 3756 case AMDGPU::V_LSHLREV_B64_e64: 3757 case AMDGPU::V_LSHLREV_B64_gfx10: 3758 case AMDGPU::V_LSHLREV_B64_vi: 3759 3760 case AMDGPU::V_LSHRREV_B64_e64: 3761 case AMDGPU::V_LSHRREV_B64_gfx10: 3762 case AMDGPU::V_LSHRREV_B64_vi: 3763 3764 case AMDGPU::V_ASHRREV_I64_e64: 3765 case AMDGPU::V_ASHRREV_I64_gfx10: 3766 case AMDGPU::V_ASHRREV_I64_vi: 3767 3768 case AMDGPU::V_PK_LSHLREV_B16: 3769 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3770 case AMDGPU::V_PK_LSHLREV_B16_vi: 3771 3772 case AMDGPU::V_PK_LSHRREV_B16: 3773 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3774 case AMDGPU::V_PK_LSHRREV_B16_vi: 3775 case AMDGPU::V_PK_ASHRREV_I16: 3776 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3777 case AMDGPU::V_PK_ASHRREV_I16_vi: 3778 return true; 3779 default: 3780 return false; 3781 } 3782 } 3783 3784 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3785 3786 using namespace SIInstrFlags; 3787 const unsigned Opcode = Inst.getOpcode(); 3788 const MCInstrDesc &Desc = MII.get(Opcode); 3789 3790 // lds_direct register is defined so that it can be used 3791 // with 9-bit operands only. Ignore encodings which do not accept these. 3792 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3793 if ((Desc.TSFlags & Enc) == 0) 3794 return None; 3795 3796 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3797 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3798 if (SrcIdx == -1) 3799 break; 3800 const auto &Src = Inst.getOperand(SrcIdx); 3801 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3802 3803 if (isGFX90A()) 3804 return StringRef("lds_direct is not supported on this GPU"); 3805 3806 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3807 return StringRef("lds_direct cannot be used with this instruction"); 3808 3809 if (SrcName != OpName::src0) 3810 return StringRef("lds_direct may be used as src0 only"); 3811 } 3812 } 3813 3814 return None; 3815 } 3816 3817 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3818 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3819 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3820 if (Op.isFlatOffset()) 3821 return Op.getStartLoc(); 3822 } 3823 return getLoc(); 3824 } 3825 3826 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3827 const OperandVector &Operands) { 3828 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3829 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3830 return true; 3831 3832 auto Opcode = Inst.getOpcode(); 3833 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3834 assert(OpNum != -1); 3835 3836 const auto &Op = Inst.getOperand(OpNum); 3837 if (!hasFlatOffsets() && Op.getImm() != 0) { 3838 Error(getFlatOffsetLoc(Operands), 3839 "flat offset modifier is not supported on this GPU"); 3840 return false; 3841 } 3842 3843 // For FLAT segment the offset must be positive; 3844 // MSB is ignored and forced to zero. 3845 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3846 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3847 if (!isIntN(OffsetSize, Op.getImm())) { 3848 Error(getFlatOffsetLoc(Operands), 3849 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3850 return false; 3851 } 3852 } else { 3853 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3854 if (!isUIntN(OffsetSize, Op.getImm())) { 3855 Error(getFlatOffsetLoc(Operands), 3856 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3857 return false; 3858 } 3859 } 3860 3861 return true; 3862 } 3863 3864 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3865 // Start with second operand because SMEM Offset cannot be dst or src0. 3866 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3867 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3868 if (Op.isSMEMOffset()) 3869 return Op.getStartLoc(); 3870 } 3871 return getLoc(); 3872 } 3873 3874 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3875 const OperandVector &Operands) { 3876 if (isCI() || isSI()) 3877 return true; 3878 3879 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3880 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3881 return true; 3882 3883 auto Opcode = Inst.getOpcode(); 3884 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3885 if (OpNum == -1) 3886 return true; 3887 3888 const auto &Op = Inst.getOperand(OpNum); 3889 if (!Op.isImm()) 3890 return true; 3891 3892 uint64_t Offset = Op.getImm(); 3893 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3894 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3895 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3896 return true; 3897 3898 Error(getSMEMOffsetLoc(Operands), 3899 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3900 "expected a 21-bit signed offset"); 3901 3902 return false; 3903 } 3904 3905 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3906 unsigned Opcode = Inst.getOpcode(); 3907 const MCInstrDesc &Desc = MII.get(Opcode); 3908 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3909 return true; 3910 3911 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3912 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3913 3914 const int OpIndices[] = { Src0Idx, Src1Idx }; 3915 3916 unsigned NumExprs = 0; 3917 unsigned NumLiterals = 0; 3918 uint32_t LiteralValue; 3919 3920 for (int OpIdx : OpIndices) { 3921 if (OpIdx == -1) break; 3922 3923 const MCOperand &MO = Inst.getOperand(OpIdx); 3924 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3925 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3926 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3927 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3928 if (NumLiterals == 0 || LiteralValue != Value) { 3929 LiteralValue = Value; 3930 ++NumLiterals; 3931 } 3932 } else if (MO.isExpr()) { 3933 ++NumExprs; 3934 } 3935 } 3936 } 3937 3938 return NumLiterals + NumExprs <= 1; 3939 } 3940 3941 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3942 const unsigned Opc = Inst.getOpcode(); 3943 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3944 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3945 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3946 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3947 3948 if (OpSel & ~3) 3949 return false; 3950 } 3951 return true; 3952 } 3953 3954 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3955 const OperandVector &Operands) { 3956 const unsigned Opc = Inst.getOpcode(); 3957 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3958 if (DppCtrlIdx < 0) 3959 return true; 3960 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3961 3962 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3963 // DPP64 is supported for row_newbcast only. 3964 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3965 if (Src0Idx >= 0 && 3966 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3967 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3968 Error(S, "64 bit dpp only supports row_newbcast"); 3969 return false; 3970 } 3971 } 3972 3973 return true; 3974 } 3975 3976 // Check if VCC register matches wavefront size 3977 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3978 auto FB = getFeatureBits(); 3979 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3980 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3981 } 3982 3983 // VOP3 literal is only allowed in GFX10+ and only one can be used 3984 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3985 const OperandVector &Operands) { 3986 unsigned Opcode = Inst.getOpcode(); 3987 const MCInstrDesc &Desc = MII.get(Opcode); 3988 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3989 return true; 3990 3991 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3992 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3993 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3994 3995 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3996 3997 unsigned NumExprs = 0; 3998 unsigned NumLiterals = 0; 3999 uint32_t LiteralValue; 4000 4001 for (int OpIdx : OpIndices) { 4002 if (OpIdx == -1) break; 4003 4004 const MCOperand &MO = Inst.getOperand(OpIdx); 4005 if (!MO.isImm() && !MO.isExpr()) 4006 continue; 4007 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4008 continue; 4009 4010 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4011 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4012 Error(getConstLoc(Operands), 4013 "inline constants are not allowed for this operand"); 4014 return false; 4015 } 4016 4017 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4018 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4019 if (NumLiterals == 0 || LiteralValue != Value) { 4020 LiteralValue = Value; 4021 ++NumLiterals; 4022 } 4023 } else if (MO.isExpr()) { 4024 ++NumExprs; 4025 } 4026 } 4027 NumLiterals += NumExprs; 4028 4029 if (!NumLiterals) 4030 return true; 4031 4032 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4033 Error(getLitLoc(Operands), "literal operands are not supported"); 4034 return false; 4035 } 4036 4037 if (NumLiterals > 1) { 4038 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4039 return false; 4040 } 4041 4042 return true; 4043 } 4044 4045 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4046 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4047 const MCRegisterInfo *MRI) { 4048 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4049 if (OpIdx < 0) 4050 return -1; 4051 4052 const MCOperand &Op = Inst.getOperand(OpIdx); 4053 if (!Op.isReg()) 4054 return -1; 4055 4056 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4057 auto Reg = Sub ? Sub : Op.getReg(); 4058 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4059 return AGPR32.contains(Reg) ? 1 : 0; 4060 } 4061 4062 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4063 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4064 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4065 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4066 SIInstrFlags::DS)) == 0) 4067 return true; 4068 4069 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4070 : AMDGPU::OpName::vdata; 4071 4072 const MCRegisterInfo *MRI = getMRI(); 4073 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4074 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4075 4076 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4077 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4078 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4079 return false; 4080 } 4081 4082 auto FB = getFeatureBits(); 4083 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4084 if (DataAreg < 0 || DstAreg < 0) 4085 return true; 4086 return DstAreg == DataAreg; 4087 } 4088 4089 return DstAreg < 1 && DataAreg < 1; 4090 } 4091 4092 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4093 auto FB = getFeatureBits(); 4094 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4095 return true; 4096 4097 const MCRegisterInfo *MRI = getMRI(); 4098 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4099 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4100 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4101 const MCOperand &Op = Inst.getOperand(I); 4102 if (!Op.isReg()) 4103 continue; 4104 4105 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4106 if (!Sub) 4107 continue; 4108 4109 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4110 return false; 4111 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4112 return false; 4113 } 4114 4115 return true; 4116 } 4117 4118 // gfx90a has an undocumented limitation: 4119 // DS_GWS opcodes must use even aligned registers. 4120 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4121 const OperandVector &Operands) { 4122 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4123 return true; 4124 4125 int Opc = Inst.getOpcode(); 4126 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4127 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4128 return true; 4129 4130 const MCRegisterInfo *MRI = getMRI(); 4131 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4132 int Data0Pos = 4133 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4134 assert(Data0Pos != -1); 4135 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4136 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4137 if (RegIdx & 1) { 4138 SMLoc RegLoc = getRegLoc(Reg, Operands); 4139 Error(RegLoc, "vgpr must be even aligned"); 4140 return false; 4141 } 4142 4143 return true; 4144 } 4145 4146 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4147 const OperandVector &Operands, 4148 const SMLoc &IDLoc) { 4149 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4150 AMDGPU::OpName::cpol); 4151 if (CPolPos == -1) 4152 return true; 4153 4154 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4155 4156 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4157 if ((TSFlags & (SIInstrFlags::SMRD)) && 4158 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4159 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4160 return false; 4161 } 4162 4163 if (isGFX90A() && (CPol & CPol::SCC)) { 4164 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4165 StringRef CStr(S.getPointer()); 4166 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4167 Error(S, "scc is not supported on this GPU"); 4168 return false; 4169 } 4170 4171 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4172 return true; 4173 4174 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4175 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4176 Error(IDLoc, "instruction must use glc"); 4177 return false; 4178 } 4179 } else { 4180 if (CPol & CPol::GLC) { 4181 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4182 StringRef CStr(S.getPointer()); 4183 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4184 Error(S, "instruction must not use glc"); 4185 return false; 4186 } 4187 } 4188 4189 return true; 4190 } 4191 4192 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4193 const SMLoc &IDLoc, 4194 const OperandVector &Operands) { 4195 if (auto ErrMsg = validateLdsDirect(Inst)) { 4196 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4197 return false; 4198 } 4199 if (!validateSOPLiteral(Inst)) { 4200 Error(getLitLoc(Operands), 4201 "only one literal operand is allowed"); 4202 return false; 4203 } 4204 if (!validateVOP3Literal(Inst, Operands)) { 4205 return false; 4206 } 4207 if (!validateConstantBusLimitations(Inst, Operands)) { 4208 return false; 4209 } 4210 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4211 return false; 4212 } 4213 if (!validateIntClampSupported(Inst)) { 4214 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4215 "integer clamping is not supported on this GPU"); 4216 return false; 4217 } 4218 if (!validateOpSel(Inst)) { 4219 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4220 "invalid op_sel operand"); 4221 return false; 4222 } 4223 if (!validateDPP(Inst, Operands)) { 4224 return false; 4225 } 4226 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4227 if (!validateMIMGD16(Inst)) { 4228 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4229 "d16 modifier is not supported on this GPU"); 4230 return false; 4231 } 4232 if (!validateMIMGDim(Inst)) { 4233 Error(IDLoc, "dim modifier is required on this GPU"); 4234 return false; 4235 } 4236 if (!validateMIMGMSAA(Inst)) { 4237 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4238 "invalid dim; must be MSAA type"); 4239 return false; 4240 } 4241 if (!validateMIMGDataSize(Inst)) { 4242 Error(IDLoc, 4243 "image data size does not match dmask and tfe"); 4244 return false; 4245 } 4246 if (!validateMIMGAddrSize(Inst)) { 4247 Error(IDLoc, 4248 "image address size does not match dim and a16"); 4249 return false; 4250 } 4251 if (!validateMIMGAtomicDMask(Inst)) { 4252 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4253 "invalid atomic image dmask"); 4254 return false; 4255 } 4256 if (!validateMIMGGatherDMask(Inst)) { 4257 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4258 "invalid image_gather dmask: only one bit must be set"); 4259 return false; 4260 } 4261 if (!validateMovrels(Inst, Operands)) { 4262 return false; 4263 } 4264 if (!validateFlatOffset(Inst, Operands)) { 4265 return false; 4266 } 4267 if (!validateSMEMOffset(Inst, Operands)) { 4268 return false; 4269 } 4270 if (!validateMAIAccWrite(Inst, Operands)) { 4271 return false; 4272 } 4273 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4274 return false; 4275 } 4276 4277 if (!validateAGPRLdSt(Inst)) { 4278 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4279 ? "invalid register class: data and dst should be all VGPR or AGPR" 4280 : "invalid register class: agpr loads and stores not supported on this GPU" 4281 ); 4282 return false; 4283 } 4284 if (!validateVGPRAlign(Inst)) { 4285 Error(IDLoc, 4286 "invalid register class: vgpr tuples must be 64 bit aligned"); 4287 return false; 4288 } 4289 if (!validateGWS(Inst, Operands)) { 4290 return false; 4291 } 4292 4293 if (!validateDivScale(Inst)) { 4294 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4295 return false; 4296 } 4297 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4298 return false; 4299 } 4300 4301 return true; 4302 } 4303 4304 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4305 const FeatureBitset &FBS, 4306 unsigned VariantID = 0); 4307 4308 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4309 const FeatureBitset &AvailableFeatures, 4310 unsigned VariantID); 4311 4312 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4313 const FeatureBitset &FBS) { 4314 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4315 } 4316 4317 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4318 const FeatureBitset &FBS, 4319 ArrayRef<unsigned> Variants) { 4320 for (auto Variant : Variants) { 4321 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4322 return true; 4323 } 4324 4325 return false; 4326 } 4327 4328 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4329 const SMLoc &IDLoc) { 4330 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4331 4332 // Check if requested instruction variant is supported. 4333 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4334 return false; 4335 4336 // This instruction is not supported. 4337 // Clear any other pending errors because they are no longer relevant. 4338 getParser().clearPendingErrors(); 4339 4340 // Requested instruction variant is not supported. 4341 // Check if any other variants are supported. 4342 StringRef VariantName = getMatchedVariantName(); 4343 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4344 return Error(IDLoc, 4345 Twine(VariantName, 4346 " variant of this instruction is not supported")); 4347 } 4348 4349 // Finally check if this instruction is supported on any other GPU. 4350 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4351 return Error(IDLoc, "instruction not supported on this GPU"); 4352 } 4353 4354 // Instruction not supported on any GPU. Probably a typo. 4355 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4356 return Error(IDLoc, "invalid instruction" + Suggestion); 4357 } 4358 4359 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4360 OperandVector &Operands, 4361 MCStreamer &Out, 4362 uint64_t &ErrorInfo, 4363 bool MatchingInlineAsm) { 4364 MCInst Inst; 4365 unsigned Result = Match_Success; 4366 for (auto Variant : getMatchedVariants()) { 4367 uint64_t EI; 4368 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4369 Variant); 4370 // We order match statuses from least to most specific. We use most specific 4371 // status as resulting 4372 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4373 if ((R == Match_Success) || 4374 (R == Match_PreferE32) || 4375 (R == Match_MissingFeature && Result != Match_PreferE32) || 4376 (R == Match_InvalidOperand && Result != Match_MissingFeature 4377 && Result != Match_PreferE32) || 4378 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4379 && Result != Match_MissingFeature 4380 && Result != Match_PreferE32)) { 4381 Result = R; 4382 ErrorInfo = EI; 4383 } 4384 if (R == Match_Success) 4385 break; 4386 } 4387 4388 if (Result == Match_Success) { 4389 if (!validateInstruction(Inst, IDLoc, Operands)) { 4390 return true; 4391 } 4392 Inst.setLoc(IDLoc); 4393 Out.emitInstruction(Inst, getSTI()); 4394 return false; 4395 } 4396 4397 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4398 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4399 return true; 4400 } 4401 4402 switch (Result) { 4403 default: break; 4404 case Match_MissingFeature: 4405 // It has been verified that the specified instruction 4406 // mnemonic is valid. A match was found but it requires 4407 // features which are not supported on this GPU. 4408 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4409 4410 case Match_InvalidOperand: { 4411 SMLoc ErrorLoc = IDLoc; 4412 if (ErrorInfo != ~0ULL) { 4413 if (ErrorInfo >= Operands.size()) { 4414 return Error(IDLoc, "too few operands for instruction"); 4415 } 4416 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4417 if (ErrorLoc == SMLoc()) 4418 ErrorLoc = IDLoc; 4419 } 4420 return Error(ErrorLoc, "invalid operand for instruction"); 4421 } 4422 4423 case Match_PreferE32: 4424 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4425 "should be encoded as e32"); 4426 case Match_MnemonicFail: 4427 llvm_unreachable("Invalid instructions should have been handled already"); 4428 } 4429 llvm_unreachable("Implement any new match types added!"); 4430 } 4431 4432 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4433 int64_t Tmp = -1; 4434 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4435 return true; 4436 } 4437 if (getParser().parseAbsoluteExpression(Tmp)) { 4438 return true; 4439 } 4440 Ret = static_cast<uint32_t>(Tmp); 4441 return false; 4442 } 4443 4444 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4445 uint32_t &Minor) { 4446 if (ParseAsAbsoluteExpression(Major)) 4447 return TokError("invalid major version"); 4448 4449 if (!trySkipToken(AsmToken::Comma)) 4450 return TokError("minor version number required, comma expected"); 4451 4452 if (ParseAsAbsoluteExpression(Minor)) 4453 return TokError("invalid minor version"); 4454 4455 return false; 4456 } 4457 4458 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4459 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4460 return TokError("directive only supported for amdgcn architecture"); 4461 4462 std::string TargetIDDirective; 4463 SMLoc TargetStart = getTok().getLoc(); 4464 if (getParser().parseEscapedString(TargetIDDirective)) 4465 return true; 4466 4467 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4468 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4469 return getParser().Error(TargetRange.Start, 4470 (Twine(".amdgcn_target directive's target id ") + 4471 Twine(TargetIDDirective) + 4472 Twine(" does not match the specified target id ") + 4473 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4474 4475 return false; 4476 } 4477 4478 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4479 return Error(Range.Start, "value out of range", Range); 4480 } 4481 4482 bool AMDGPUAsmParser::calculateGPRBlocks( 4483 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4484 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4485 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4486 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4487 // TODO(scott.linder): These calculations are duplicated from 4488 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4489 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4490 4491 unsigned NumVGPRs = NextFreeVGPR; 4492 unsigned NumSGPRs = NextFreeSGPR; 4493 4494 if (Version.Major >= 10) 4495 NumSGPRs = 0; 4496 else { 4497 unsigned MaxAddressableNumSGPRs = 4498 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4499 4500 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4501 NumSGPRs > MaxAddressableNumSGPRs) 4502 return OutOfRangeError(SGPRRange); 4503 4504 NumSGPRs += 4505 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4506 4507 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4508 NumSGPRs > MaxAddressableNumSGPRs) 4509 return OutOfRangeError(SGPRRange); 4510 4511 if (Features.test(FeatureSGPRInitBug)) 4512 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4513 } 4514 4515 VGPRBlocks = 4516 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4517 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4518 4519 return false; 4520 } 4521 4522 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4523 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4524 return TokError("directive only supported for amdgcn architecture"); 4525 4526 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4527 return TokError("directive only supported for amdhsa OS"); 4528 4529 StringRef KernelName; 4530 if (getParser().parseIdentifier(KernelName)) 4531 return true; 4532 4533 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4534 4535 StringSet<> Seen; 4536 4537 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4538 4539 SMRange VGPRRange; 4540 uint64_t NextFreeVGPR = 0; 4541 uint64_t AccumOffset = 0; 4542 SMRange SGPRRange; 4543 uint64_t NextFreeSGPR = 0; 4544 unsigned UserSGPRCount = 0; 4545 bool ReserveVCC = true; 4546 bool ReserveFlatScr = true; 4547 Optional<bool> EnableWavefrontSize32; 4548 4549 while (true) { 4550 while (trySkipToken(AsmToken::EndOfStatement)); 4551 4552 StringRef ID; 4553 SMRange IDRange = getTok().getLocRange(); 4554 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4555 return true; 4556 4557 if (ID == ".end_amdhsa_kernel") 4558 break; 4559 4560 if (Seen.find(ID) != Seen.end()) 4561 return TokError(".amdhsa_ directives cannot be repeated"); 4562 Seen.insert(ID); 4563 4564 SMLoc ValStart = getLoc(); 4565 int64_t IVal; 4566 if (getParser().parseAbsoluteExpression(IVal)) 4567 return true; 4568 SMLoc ValEnd = getLoc(); 4569 SMRange ValRange = SMRange(ValStart, ValEnd); 4570 4571 if (IVal < 0) 4572 return OutOfRangeError(ValRange); 4573 4574 uint64_t Val = IVal; 4575 4576 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4577 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4578 return OutOfRangeError(RANGE); \ 4579 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4580 4581 if (ID == ".amdhsa_group_segment_fixed_size") { 4582 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4583 return OutOfRangeError(ValRange); 4584 KD.group_segment_fixed_size = Val; 4585 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4586 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4587 return OutOfRangeError(ValRange); 4588 KD.private_segment_fixed_size = Val; 4589 } else if (ID == ".amdhsa_kernarg_size") { 4590 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4591 return OutOfRangeError(ValRange); 4592 KD.kernarg_size = Val; 4593 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4594 if (hasArchitectedFlatScratch()) 4595 return Error(IDRange.Start, 4596 "directive is not supported with architected flat scratch", 4597 IDRange); 4598 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4599 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4600 Val, ValRange); 4601 if (Val) 4602 UserSGPRCount += 4; 4603 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4604 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4605 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4606 ValRange); 4607 if (Val) 4608 UserSGPRCount += 2; 4609 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4610 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4611 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4612 ValRange); 4613 if (Val) 4614 UserSGPRCount += 2; 4615 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4616 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4617 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4618 Val, ValRange); 4619 if (Val) 4620 UserSGPRCount += 2; 4621 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4622 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4623 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4624 ValRange); 4625 if (Val) 4626 UserSGPRCount += 2; 4627 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4628 if (hasArchitectedFlatScratch()) 4629 return Error(IDRange.Start, 4630 "directive is not supported with architected flat scratch", 4631 IDRange); 4632 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4633 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4634 ValRange); 4635 if (Val) 4636 UserSGPRCount += 2; 4637 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4638 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4639 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4640 Val, ValRange); 4641 if (Val) 4642 UserSGPRCount += 1; 4643 } else if (ID == ".amdhsa_wavefront_size32") { 4644 if (IVersion.Major < 10) 4645 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4646 EnableWavefrontSize32 = Val; 4647 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4648 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4649 Val, ValRange); 4650 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4651 if (hasArchitectedFlatScratch()) 4652 return Error(IDRange.Start, 4653 "directive is not supported with architected flat scratch", 4654 IDRange); 4655 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4656 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4657 } else if (ID == ".amdhsa_enable_private_segment") { 4658 if (!hasArchitectedFlatScratch()) 4659 return Error( 4660 IDRange.Start, 4661 "directive is not supported without architected flat scratch", 4662 IDRange); 4663 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4664 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4665 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4666 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4667 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4668 ValRange); 4669 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4670 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4671 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4672 ValRange); 4673 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4674 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4675 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4676 ValRange); 4677 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4678 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4679 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4680 ValRange); 4681 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4682 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4683 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4684 ValRange); 4685 } else if (ID == ".amdhsa_next_free_vgpr") { 4686 VGPRRange = ValRange; 4687 NextFreeVGPR = Val; 4688 } else if (ID == ".amdhsa_next_free_sgpr") { 4689 SGPRRange = ValRange; 4690 NextFreeSGPR = Val; 4691 } else if (ID == ".amdhsa_accum_offset") { 4692 if (!isGFX90A()) 4693 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4694 AccumOffset = Val; 4695 } else if (ID == ".amdhsa_reserve_vcc") { 4696 if (!isUInt<1>(Val)) 4697 return OutOfRangeError(ValRange); 4698 ReserveVCC = Val; 4699 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4700 if (IVersion.Major < 7) 4701 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4702 if (hasArchitectedFlatScratch()) 4703 return Error(IDRange.Start, 4704 "directive is not supported with architected flat scratch", 4705 IDRange); 4706 if (!isUInt<1>(Val)) 4707 return OutOfRangeError(ValRange); 4708 ReserveFlatScr = Val; 4709 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4710 if (IVersion.Major < 8) 4711 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4712 if (!isUInt<1>(Val)) 4713 return OutOfRangeError(ValRange); 4714 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4715 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4716 IDRange); 4717 } else if (ID == ".amdhsa_float_round_mode_32") { 4718 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4719 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4720 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4721 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4722 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4723 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4724 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4725 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4726 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4727 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4728 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4729 ValRange); 4730 } else if (ID == ".amdhsa_dx10_clamp") { 4731 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4732 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4733 } else if (ID == ".amdhsa_ieee_mode") { 4734 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4735 Val, ValRange); 4736 } else if (ID == ".amdhsa_fp16_overflow") { 4737 if (IVersion.Major < 9) 4738 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4739 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4740 ValRange); 4741 } else if (ID == ".amdhsa_tg_split") { 4742 if (!isGFX90A()) 4743 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4744 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4745 ValRange); 4746 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4747 if (IVersion.Major < 10) 4748 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4749 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4750 ValRange); 4751 } else if (ID == ".amdhsa_memory_ordered") { 4752 if (IVersion.Major < 10) 4753 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4754 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4755 ValRange); 4756 } else if (ID == ".amdhsa_forward_progress") { 4757 if (IVersion.Major < 10) 4758 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4759 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4760 ValRange); 4761 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4762 PARSE_BITS_ENTRY( 4763 KD.compute_pgm_rsrc2, 4764 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4765 ValRange); 4766 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4767 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4768 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4769 Val, ValRange); 4770 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4771 PARSE_BITS_ENTRY( 4772 KD.compute_pgm_rsrc2, 4773 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4774 ValRange); 4775 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4776 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4777 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4778 Val, ValRange); 4779 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4780 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4781 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4782 Val, ValRange); 4783 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4784 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4785 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4786 Val, ValRange); 4787 } else if (ID == ".amdhsa_exception_int_div_zero") { 4788 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4789 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4790 Val, ValRange); 4791 } else { 4792 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4793 } 4794 4795 #undef PARSE_BITS_ENTRY 4796 } 4797 4798 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4799 return TokError(".amdhsa_next_free_vgpr directive is required"); 4800 4801 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4802 return TokError(".amdhsa_next_free_sgpr directive is required"); 4803 4804 unsigned VGPRBlocks; 4805 unsigned SGPRBlocks; 4806 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4807 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4808 EnableWavefrontSize32, NextFreeVGPR, 4809 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4810 SGPRBlocks)) 4811 return true; 4812 4813 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4814 VGPRBlocks)) 4815 return OutOfRangeError(VGPRRange); 4816 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4817 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4818 4819 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4820 SGPRBlocks)) 4821 return OutOfRangeError(SGPRRange); 4822 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4823 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4824 SGPRBlocks); 4825 4826 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4827 return TokError("too many user SGPRs enabled"); 4828 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4829 UserSGPRCount); 4830 4831 if (isGFX90A()) { 4832 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4833 return TokError(".amdhsa_accum_offset directive is required"); 4834 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4835 return TokError("accum_offset should be in range [4..256] in " 4836 "increments of 4"); 4837 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4838 return TokError("accum_offset exceeds total VGPR allocation"); 4839 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4840 (AccumOffset / 4 - 1)); 4841 } 4842 4843 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4844 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4845 ReserveFlatScr); 4846 return false; 4847 } 4848 4849 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4850 uint32_t Major; 4851 uint32_t Minor; 4852 4853 if (ParseDirectiveMajorMinor(Major, Minor)) 4854 return true; 4855 4856 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4857 return false; 4858 } 4859 4860 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4861 uint32_t Major; 4862 uint32_t Minor; 4863 uint32_t Stepping; 4864 StringRef VendorName; 4865 StringRef ArchName; 4866 4867 // If this directive has no arguments, then use the ISA version for the 4868 // targeted GPU. 4869 if (isToken(AsmToken::EndOfStatement)) { 4870 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4871 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4872 ISA.Stepping, 4873 "AMD", "AMDGPU"); 4874 return false; 4875 } 4876 4877 if (ParseDirectiveMajorMinor(Major, Minor)) 4878 return true; 4879 4880 if (!trySkipToken(AsmToken::Comma)) 4881 return TokError("stepping version number required, comma expected"); 4882 4883 if (ParseAsAbsoluteExpression(Stepping)) 4884 return TokError("invalid stepping version"); 4885 4886 if (!trySkipToken(AsmToken::Comma)) 4887 return TokError("vendor name required, comma expected"); 4888 4889 if (!parseString(VendorName, "invalid vendor name")) 4890 return true; 4891 4892 if (!trySkipToken(AsmToken::Comma)) 4893 return TokError("arch name required, comma expected"); 4894 4895 if (!parseString(ArchName, "invalid arch name")) 4896 return true; 4897 4898 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4899 VendorName, ArchName); 4900 return false; 4901 } 4902 4903 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4904 amd_kernel_code_t &Header) { 4905 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4906 // assembly for backwards compatibility. 4907 if (ID == "max_scratch_backing_memory_byte_size") { 4908 Parser.eatToEndOfStatement(); 4909 return false; 4910 } 4911 4912 SmallString<40> ErrStr; 4913 raw_svector_ostream Err(ErrStr); 4914 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4915 return TokError(Err.str()); 4916 } 4917 Lex(); 4918 4919 if (ID == "enable_wavefront_size32") { 4920 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4921 if (!isGFX10Plus()) 4922 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4923 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4924 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4925 } else { 4926 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4927 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4928 } 4929 } 4930 4931 if (ID == "wavefront_size") { 4932 if (Header.wavefront_size == 5) { 4933 if (!isGFX10Plus()) 4934 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4935 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4936 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4937 } else if (Header.wavefront_size == 6) { 4938 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4939 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4940 } 4941 } 4942 4943 if (ID == "enable_wgp_mode") { 4944 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4945 !isGFX10Plus()) 4946 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4947 } 4948 4949 if (ID == "enable_mem_ordered") { 4950 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4951 !isGFX10Plus()) 4952 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4953 } 4954 4955 if (ID == "enable_fwd_progress") { 4956 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4957 !isGFX10Plus()) 4958 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4959 } 4960 4961 return false; 4962 } 4963 4964 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4965 amd_kernel_code_t Header; 4966 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4967 4968 while (true) { 4969 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4970 // will set the current token to EndOfStatement. 4971 while(trySkipToken(AsmToken::EndOfStatement)); 4972 4973 StringRef ID; 4974 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4975 return true; 4976 4977 if (ID == ".end_amd_kernel_code_t") 4978 break; 4979 4980 if (ParseAMDKernelCodeTValue(ID, Header)) 4981 return true; 4982 } 4983 4984 getTargetStreamer().EmitAMDKernelCodeT(Header); 4985 4986 return false; 4987 } 4988 4989 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4990 StringRef KernelName; 4991 if (!parseId(KernelName, "expected symbol name")) 4992 return true; 4993 4994 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4995 ELF::STT_AMDGPU_HSA_KERNEL); 4996 4997 KernelScope.initialize(getContext()); 4998 return false; 4999 } 5000 5001 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5002 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5003 return Error(getLoc(), 5004 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5005 "architectures"); 5006 } 5007 5008 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5009 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5010 return Error(getParser().getTok().getLoc(), "target id must match options"); 5011 5012 getTargetStreamer().EmitISAVersion(); 5013 Lex(); 5014 5015 return false; 5016 } 5017 5018 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5019 const char *AssemblerDirectiveBegin; 5020 const char *AssemblerDirectiveEnd; 5021 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5022 isHsaAbiVersion3Or4(&getSTI()) 5023 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5024 HSAMD::V3::AssemblerDirectiveEnd) 5025 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5026 HSAMD::AssemblerDirectiveEnd); 5027 5028 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5029 return Error(getLoc(), 5030 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5031 "not available on non-amdhsa OSes")).str()); 5032 } 5033 5034 std::string HSAMetadataString; 5035 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5036 HSAMetadataString)) 5037 return true; 5038 5039 if (isHsaAbiVersion3Or4(&getSTI())) { 5040 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5041 return Error(getLoc(), "invalid HSA metadata"); 5042 } else { 5043 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5044 return Error(getLoc(), "invalid HSA metadata"); 5045 } 5046 5047 return false; 5048 } 5049 5050 /// Common code to parse out a block of text (typically YAML) between start and 5051 /// end directives. 5052 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5053 const char *AssemblerDirectiveEnd, 5054 std::string &CollectString) { 5055 5056 raw_string_ostream CollectStream(CollectString); 5057 5058 getLexer().setSkipSpace(false); 5059 5060 bool FoundEnd = false; 5061 while (!isToken(AsmToken::Eof)) { 5062 while (isToken(AsmToken::Space)) { 5063 CollectStream << getTokenStr(); 5064 Lex(); 5065 } 5066 5067 if (trySkipId(AssemblerDirectiveEnd)) { 5068 FoundEnd = true; 5069 break; 5070 } 5071 5072 CollectStream << Parser.parseStringToEndOfStatement() 5073 << getContext().getAsmInfo()->getSeparatorString(); 5074 5075 Parser.eatToEndOfStatement(); 5076 } 5077 5078 getLexer().setSkipSpace(true); 5079 5080 if (isToken(AsmToken::Eof) && !FoundEnd) { 5081 return TokError(Twine("expected directive ") + 5082 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5083 } 5084 5085 CollectStream.flush(); 5086 return false; 5087 } 5088 5089 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5090 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5091 std::string String; 5092 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5093 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5094 return true; 5095 5096 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5097 if (!PALMetadata->setFromString(String)) 5098 return Error(getLoc(), "invalid PAL metadata"); 5099 return false; 5100 } 5101 5102 /// Parse the assembler directive for old linear-format PAL metadata. 5103 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5104 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5105 return Error(getLoc(), 5106 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5107 "not available on non-amdpal OSes")).str()); 5108 } 5109 5110 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5111 PALMetadata->setLegacy(); 5112 for (;;) { 5113 uint32_t Key, Value; 5114 if (ParseAsAbsoluteExpression(Key)) { 5115 return TokError(Twine("invalid value in ") + 5116 Twine(PALMD::AssemblerDirective)); 5117 } 5118 if (!trySkipToken(AsmToken::Comma)) { 5119 return TokError(Twine("expected an even number of values in ") + 5120 Twine(PALMD::AssemblerDirective)); 5121 } 5122 if (ParseAsAbsoluteExpression(Value)) { 5123 return TokError(Twine("invalid value in ") + 5124 Twine(PALMD::AssemblerDirective)); 5125 } 5126 PALMetadata->setRegister(Key, Value); 5127 if (!trySkipToken(AsmToken::Comma)) 5128 break; 5129 } 5130 return false; 5131 } 5132 5133 /// ParseDirectiveAMDGPULDS 5134 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5135 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5136 if (getParser().checkForValidSection()) 5137 return true; 5138 5139 StringRef Name; 5140 SMLoc NameLoc = getLoc(); 5141 if (getParser().parseIdentifier(Name)) 5142 return TokError("expected identifier in directive"); 5143 5144 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5145 if (parseToken(AsmToken::Comma, "expected ','")) 5146 return true; 5147 5148 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5149 5150 int64_t Size; 5151 SMLoc SizeLoc = getLoc(); 5152 if (getParser().parseAbsoluteExpression(Size)) 5153 return true; 5154 if (Size < 0) 5155 return Error(SizeLoc, "size must be non-negative"); 5156 if (Size > LocalMemorySize) 5157 return Error(SizeLoc, "size is too large"); 5158 5159 int64_t Alignment = 4; 5160 if (trySkipToken(AsmToken::Comma)) { 5161 SMLoc AlignLoc = getLoc(); 5162 if (getParser().parseAbsoluteExpression(Alignment)) 5163 return true; 5164 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5165 return Error(AlignLoc, "alignment must be a power of two"); 5166 5167 // Alignment larger than the size of LDS is possible in theory, as long 5168 // as the linker manages to place to symbol at address 0, but we do want 5169 // to make sure the alignment fits nicely into a 32-bit integer. 5170 if (Alignment >= 1u << 31) 5171 return Error(AlignLoc, "alignment is too large"); 5172 } 5173 5174 if (parseToken(AsmToken::EndOfStatement, 5175 "unexpected token in '.amdgpu_lds' directive")) 5176 return true; 5177 5178 Symbol->redefineIfPossible(); 5179 if (!Symbol->isUndefined()) 5180 return Error(NameLoc, "invalid symbol redefinition"); 5181 5182 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5183 return false; 5184 } 5185 5186 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5187 StringRef IDVal = DirectiveID.getString(); 5188 5189 if (isHsaAbiVersion3Or4(&getSTI())) { 5190 if (IDVal == ".amdhsa_kernel") 5191 return ParseDirectiveAMDHSAKernel(); 5192 5193 // TODO: Restructure/combine with PAL metadata directive. 5194 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5195 return ParseDirectiveHSAMetadata(); 5196 } else { 5197 if (IDVal == ".hsa_code_object_version") 5198 return ParseDirectiveHSACodeObjectVersion(); 5199 5200 if (IDVal == ".hsa_code_object_isa") 5201 return ParseDirectiveHSACodeObjectISA(); 5202 5203 if (IDVal == ".amd_kernel_code_t") 5204 return ParseDirectiveAMDKernelCodeT(); 5205 5206 if (IDVal == ".amdgpu_hsa_kernel") 5207 return ParseDirectiveAMDGPUHsaKernel(); 5208 5209 if (IDVal == ".amd_amdgpu_isa") 5210 return ParseDirectiveISAVersion(); 5211 5212 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5213 return ParseDirectiveHSAMetadata(); 5214 } 5215 5216 if (IDVal == ".amdgcn_target") 5217 return ParseDirectiveAMDGCNTarget(); 5218 5219 if (IDVal == ".amdgpu_lds") 5220 return ParseDirectiveAMDGPULDS(); 5221 5222 if (IDVal == PALMD::AssemblerDirectiveBegin) 5223 return ParseDirectivePALMetadataBegin(); 5224 5225 if (IDVal == PALMD::AssemblerDirective) 5226 return ParseDirectivePALMetadata(); 5227 5228 return true; 5229 } 5230 5231 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5232 unsigned RegNo) { 5233 5234 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5235 R.isValid(); ++R) { 5236 if (*R == RegNo) 5237 return isGFX9Plus(); 5238 } 5239 5240 // GFX10 has 2 more SGPRs 104 and 105. 5241 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5242 R.isValid(); ++R) { 5243 if (*R == RegNo) 5244 return hasSGPR104_SGPR105(); 5245 } 5246 5247 switch (RegNo) { 5248 case AMDGPU::SRC_SHARED_BASE: 5249 case AMDGPU::SRC_SHARED_LIMIT: 5250 case AMDGPU::SRC_PRIVATE_BASE: 5251 case AMDGPU::SRC_PRIVATE_LIMIT: 5252 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5253 return isGFX9Plus(); 5254 case AMDGPU::TBA: 5255 case AMDGPU::TBA_LO: 5256 case AMDGPU::TBA_HI: 5257 case AMDGPU::TMA: 5258 case AMDGPU::TMA_LO: 5259 case AMDGPU::TMA_HI: 5260 return !isGFX9Plus(); 5261 case AMDGPU::XNACK_MASK: 5262 case AMDGPU::XNACK_MASK_LO: 5263 case AMDGPU::XNACK_MASK_HI: 5264 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5265 case AMDGPU::SGPR_NULL: 5266 return isGFX10Plus(); 5267 default: 5268 break; 5269 } 5270 5271 if (isCI()) 5272 return true; 5273 5274 if (isSI() || isGFX10Plus()) { 5275 // No flat_scr on SI. 5276 // On GFX10 flat scratch is not a valid register operand and can only be 5277 // accessed with s_setreg/s_getreg. 5278 switch (RegNo) { 5279 case AMDGPU::FLAT_SCR: 5280 case AMDGPU::FLAT_SCR_LO: 5281 case AMDGPU::FLAT_SCR_HI: 5282 return false; 5283 default: 5284 return true; 5285 } 5286 } 5287 5288 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5289 // SI/CI have. 5290 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5291 R.isValid(); ++R) { 5292 if (*R == RegNo) 5293 return hasSGPR102_SGPR103(); 5294 } 5295 5296 return true; 5297 } 5298 5299 OperandMatchResultTy 5300 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5301 OperandMode Mode) { 5302 // Try to parse with a custom parser 5303 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5304 5305 // If we successfully parsed the operand or if there as an error parsing, 5306 // we are done. 5307 // 5308 // If we are parsing after we reach EndOfStatement then this means we 5309 // are appending default values to the Operands list. This is only done 5310 // by custom parser, so we shouldn't continue on to the generic parsing. 5311 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5312 isToken(AsmToken::EndOfStatement)) 5313 return ResTy; 5314 5315 SMLoc RBraceLoc; 5316 SMLoc LBraceLoc = getLoc(); 5317 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5318 unsigned Prefix = Operands.size(); 5319 5320 for (;;) { 5321 auto Loc = getLoc(); 5322 ResTy = parseReg(Operands); 5323 if (ResTy == MatchOperand_NoMatch) 5324 Error(Loc, "expected a register"); 5325 if (ResTy != MatchOperand_Success) 5326 return MatchOperand_ParseFail; 5327 5328 RBraceLoc = getLoc(); 5329 if (trySkipToken(AsmToken::RBrac)) 5330 break; 5331 5332 if (!skipToken(AsmToken::Comma, 5333 "expected a comma or a closing square bracket")) { 5334 return MatchOperand_ParseFail; 5335 } 5336 } 5337 5338 if (Operands.size() - Prefix > 1) { 5339 Operands.insert(Operands.begin() + Prefix, 5340 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5341 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5342 } 5343 5344 return MatchOperand_Success; 5345 } 5346 5347 return parseRegOrImm(Operands); 5348 } 5349 5350 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5351 // Clear any forced encodings from the previous instruction. 5352 setForcedEncodingSize(0); 5353 setForcedDPP(false); 5354 setForcedSDWA(false); 5355 5356 if (Name.endswith("_e64")) { 5357 setForcedEncodingSize(64); 5358 return Name.substr(0, Name.size() - 4); 5359 } else if (Name.endswith("_e32")) { 5360 setForcedEncodingSize(32); 5361 return Name.substr(0, Name.size() - 4); 5362 } else if (Name.endswith("_dpp")) { 5363 setForcedDPP(true); 5364 return Name.substr(0, Name.size() - 4); 5365 } else if (Name.endswith("_sdwa")) { 5366 setForcedSDWA(true); 5367 return Name.substr(0, Name.size() - 5); 5368 } 5369 return Name; 5370 } 5371 5372 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5373 StringRef Name, 5374 SMLoc NameLoc, OperandVector &Operands) { 5375 // Add the instruction mnemonic 5376 Name = parseMnemonicSuffix(Name); 5377 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5378 5379 bool IsMIMG = Name.startswith("image_"); 5380 5381 while (!trySkipToken(AsmToken::EndOfStatement)) { 5382 OperandMode Mode = OperandMode_Default; 5383 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5384 Mode = OperandMode_NSA; 5385 CPolSeen = 0; 5386 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5387 5388 if (Res != MatchOperand_Success) { 5389 checkUnsupportedInstruction(Name, NameLoc); 5390 if (!Parser.hasPendingError()) { 5391 // FIXME: use real operand location rather than the current location. 5392 StringRef Msg = 5393 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5394 "not a valid operand."; 5395 Error(getLoc(), Msg); 5396 } 5397 while (!trySkipToken(AsmToken::EndOfStatement)) { 5398 lex(); 5399 } 5400 return true; 5401 } 5402 5403 // Eat the comma or space if there is one. 5404 trySkipToken(AsmToken::Comma); 5405 } 5406 5407 return false; 5408 } 5409 5410 //===----------------------------------------------------------------------===// 5411 // Utility functions 5412 //===----------------------------------------------------------------------===// 5413 5414 OperandMatchResultTy 5415 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5416 5417 if (!trySkipId(Prefix, AsmToken::Colon)) 5418 return MatchOperand_NoMatch; 5419 5420 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5421 } 5422 5423 OperandMatchResultTy 5424 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5425 AMDGPUOperand::ImmTy ImmTy, 5426 bool (*ConvertResult)(int64_t&)) { 5427 SMLoc S = getLoc(); 5428 int64_t Value = 0; 5429 5430 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5431 if (Res != MatchOperand_Success) 5432 return Res; 5433 5434 if (ConvertResult && !ConvertResult(Value)) { 5435 Error(S, "invalid " + StringRef(Prefix) + " value."); 5436 } 5437 5438 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5439 return MatchOperand_Success; 5440 } 5441 5442 OperandMatchResultTy 5443 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5444 OperandVector &Operands, 5445 AMDGPUOperand::ImmTy ImmTy, 5446 bool (*ConvertResult)(int64_t&)) { 5447 SMLoc S = getLoc(); 5448 if (!trySkipId(Prefix, AsmToken::Colon)) 5449 return MatchOperand_NoMatch; 5450 5451 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5452 return MatchOperand_ParseFail; 5453 5454 unsigned Val = 0; 5455 const unsigned MaxSize = 4; 5456 5457 // FIXME: How to verify the number of elements matches the number of src 5458 // operands? 5459 for (int I = 0; ; ++I) { 5460 int64_t Op; 5461 SMLoc Loc = getLoc(); 5462 if (!parseExpr(Op)) 5463 return MatchOperand_ParseFail; 5464 5465 if (Op != 0 && Op != 1) { 5466 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5467 return MatchOperand_ParseFail; 5468 } 5469 5470 Val |= (Op << I); 5471 5472 if (trySkipToken(AsmToken::RBrac)) 5473 break; 5474 5475 if (I + 1 == MaxSize) { 5476 Error(getLoc(), "expected a closing square bracket"); 5477 return MatchOperand_ParseFail; 5478 } 5479 5480 if (!skipToken(AsmToken::Comma, "expected a comma")) 5481 return MatchOperand_ParseFail; 5482 } 5483 5484 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5485 return MatchOperand_Success; 5486 } 5487 5488 OperandMatchResultTy 5489 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5490 AMDGPUOperand::ImmTy ImmTy) { 5491 int64_t Bit; 5492 SMLoc S = getLoc(); 5493 5494 if (trySkipId(Name)) { 5495 Bit = 1; 5496 } else if (trySkipId("no", Name)) { 5497 Bit = 0; 5498 } else { 5499 return MatchOperand_NoMatch; 5500 } 5501 5502 if (Name == "r128" && !hasMIMG_R128()) { 5503 Error(S, "r128 modifier is not supported on this GPU"); 5504 return MatchOperand_ParseFail; 5505 } 5506 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5507 Error(S, "a16 modifier is not supported on this GPU"); 5508 return MatchOperand_ParseFail; 5509 } 5510 5511 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5512 ImmTy = AMDGPUOperand::ImmTyR128A16; 5513 5514 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5515 return MatchOperand_Success; 5516 } 5517 5518 OperandMatchResultTy 5519 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5520 unsigned CPolOn = 0; 5521 unsigned CPolOff = 0; 5522 SMLoc S = getLoc(); 5523 5524 if (trySkipId("glc")) 5525 CPolOn = AMDGPU::CPol::GLC; 5526 else if (trySkipId("noglc")) 5527 CPolOff = AMDGPU::CPol::GLC; 5528 else if (trySkipId("slc")) 5529 CPolOn = AMDGPU::CPol::SLC; 5530 else if (trySkipId("noslc")) 5531 CPolOff = AMDGPU::CPol::SLC; 5532 else if (trySkipId("dlc")) 5533 CPolOn = AMDGPU::CPol::DLC; 5534 else if (trySkipId("nodlc")) 5535 CPolOff = AMDGPU::CPol::DLC; 5536 else if (trySkipId("scc")) 5537 CPolOn = AMDGPU::CPol::SCC; 5538 else if (trySkipId("noscc")) 5539 CPolOff = AMDGPU::CPol::SCC; 5540 else 5541 return MatchOperand_NoMatch; 5542 5543 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5544 Error(S, "dlc modifier is not supported on this GPU"); 5545 return MatchOperand_ParseFail; 5546 } 5547 5548 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5549 Error(S, "scc modifier is not supported on this GPU"); 5550 return MatchOperand_ParseFail; 5551 } 5552 5553 if (CPolSeen & (CPolOn | CPolOff)) { 5554 Error(S, "duplicate cache policy modifier"); 5555 return MatchOperand_ParseFail; 5556 } 5557 5558 CPolSeen |= (CPolOn | CPolOff); 5559 5560 for (unsigned I = 1; I != Operands.size(); ++I) { 5561 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5562 if (Op.isCPol()) { 5563 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5564 return MatchOperand_Success; 5565 } 5566 } 5567 5568 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5569 AMDGPUOperand::ImmTyCPol)); 5570 5571 return MatchOperand_Success; 5572 } 5573 5574 static void addOptionalImmOperand( 5575 MCInst& Inst, const OperandVector& Operands, 5576 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5577 AMDGPUOperand::ImmTy ImmT, 5578 int64_t Default = 0) { 5579 auto i = OptionalIdx.find(ImmT); 5580 if (i != OptionalIdx.end()) { 5581 unsigned Idx = i->second; 5582 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5583 } else { 5584 Inst.addOperand(MCOperand::createImm(Default)); 5585 } 5586 } 5587 5588 OperandMatchResultTy 5589 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5590 StringRef &Value, 5591 SMLoc &StringLoc) { 5592 if (!trySkipId(Prefix, AsmToken::Colon)) 5593 return MatchOperand_NoMatch; 5594 5595 StringLoc = getLoc(); 5596 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5597 : MatchOperand_ParseFail; 5598 } 5599 5600 //===----------------------------------------------------------------------===// 5601 // MTBUF format 5602 //===----------------------------------------------------------------------===// 5603 5604 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5605 int64_t MaxVal, 5606 int64_t &Fmt) { 5607 int64_t Val; 5608 SMLoc Loc = getLoc(); 5609 5610 auto Res = parseIntWithPrefix(Pref, Val); 5611 if (Res == MatchOperand_ParseFail) 5612 return false; 5613 if (Res == MatchOperand_NoMatch) 5614 return true; 5615 5616 if (Val < 0 || Val > MaxVal) { 5617 Error(Loc, Twine("out of range ", StringRef(Pref))); 5618 return false; 5619 } 5620 5621 Fmt = Val; 5622 return true; 5623 } 5624 5625 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5626 // values to live in a joint format operand in the MCInst encoding. 5627 OperandMatchResultTy 5628 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5629 using namespace llvm::AMDGPU::MTBUFFormat; 5630 5631 int64_t Dfmt = DFMT_UNDEF; 5632 int64_t Nfmt = NFMT_UNDEF; 5633 5634 // dfmt and nfmt can appear in either order, and each is optional. 5635 for (int I = 0; I < 2; ++I) { 5636 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5637 return MatchOperand_ParseFail; 5638 5639 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5640 return MatchOperand_ParseFail; 5641 } 5642 // Skip optional comma between dfmt/nfmt 5643 // but guard against 2 commas following each other. 5644 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5645 !peekToken().is(AsmToken::Comma)) { 5646 trySkipToken(AsmToken::Comma); 5647 } 5648 } 5649 5650 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5651 return MatchOperand_NoMatch; 5652 5653 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5654 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5655 5656 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5657 return MatchOperand_Success; 5658 } 5659 5660 OperandMatchResultTy 5661 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5662 using namespace llvm::AMDGPU::MTBUFFormat; 5663 5664 int64_t Fmt = UFMT_UNDEF; 5665 5666 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5667 return MatchOperand_ParseFail; 5668 5669 if (Fmt == UFMT_UNDEF) 5670 return MatchOperand_NoMatch; 5671 5672 Format = Fmt; 5673 return MatchOperand_Success; 5674 } 5675 5676 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5677 int64_t &Nfmt, 5678 StringRef FormatStr, 5679 SMLoc Loc) { 5680 using namespace llvm::AMDGPU::MTBUFFormat; 5681 int64_t Format; 5682 5683 Format = getDfmt(FormatStr); 5684 if (Format != DFMT_UNDEF) { 5685 Dfmt = Format; 5686 return true; 5687 } 5688 5689 Format = getNfmt(FormatStr, getSTI()); 5690 if (Format != NFMT_UNDEF) { 5691 Nfmt = Format; 5692 return true; 5693 } 5694 5695 Error(Loc, "unsupported format"); 5696 return false; 5697 } 5698 5699 OperandMatchResultTy 5700 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5701 SMLoc FormatLoc, 5702 int64_t &Format) { 5703 using namespace llvm::AMDGPU::MTBUFFormat; 5704 5705 int64_t Dfmt = DFMT_UNDEF; 5706 int64_t Nfmt = NFMT_UNDEF; 5707 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5708 return MatchOperand_ParseFail; 5709 5710 if (trySkipToken(AsmToken::Comma)) { 5711 StringRef Str; 5712 SMLoc Loc = getLoc(); 5713 if (!parseId(Str, "expected a format string") || 5714 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5715 return MatchOperand_ParseFail; 5716 } 5717 if (Dfmt == DFMT_UNDEF) { 5718 Error(Loc, "duplicate numeric format"); 5719 return MatchOperand_ParseFail; 5720 } else if (Nfmt == NFMT_UNDEF) { 5721 Error(Loc, "duplicate data format"); 5722 return MatchOperand_ParseFail; 5723 } 5724 } 5725 5726 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5727 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5728 5729 if (isGFX10Plus()) { 5730 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5731 if (Ufmt == UFMT_UNDEF) { 5732 Error(FormatLoc, "unsupported format"); 5733 return MatchOperand_ParseFail; 5734 } 5735 Format = Ufmt; 5736 } else { 5737 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5738 } 5739 5740 return MatchOperand_Success; 5741 } 5742 5743 OperandMatchResultTy 5744 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5745 SMLoc Loc, 5746 int64_t &Format) { 5747 using namespace llvm::AMDGPU::MTBUFFormat; 5748 5749 auto Id = getUnifiedFormat(FormatStr); 5750 if (Id == UFMT_UNDEF) 5751 return MatchOperand_NoMatch; 5752 5753 if (!isGFX10Plus()) { 5754 Error(Loc, "unified format is not supported on this GPU"); 5755 return MatchOperand_ParseFail; 5756 } 5757 5758 Format = Id; 5759 return MatchOperand_Success; 5760 } 5761 5762 OperandMatchResultTy 5763 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5764 using namespace llvm::AMDGPU::MTBUFFormat; 5765 SMLoc Loc = getLoc(); 5766 5767 if (!parseExpr(Format)) 5768 return MatchOperand_ParseFail; 5769 if (!isValidFormatEncoding(Format, getSTI())) { 5770 Error(Loc, "out of range format"); 5771 return MatchOperand_ParseFail; 5772 } 5773 5774 return MatchOperand_Success; 5775 } 5776 5777 OperandMatchResultTy 5778 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5779 using namespace llvm::AMDGPU::MTBUFFormat; 5780 5781 if (!trySkipId("format", AsmToken::Colon)) 5782 return MatchOperand_NoMatch; 5783 5784 if (trySkipToken(AsmToken::LBrac)) { 5785 StringRef FormatStr; 5786 SMLoc Loc = getLoc(); 5787 if (!parseId(FormatStr, "expected a format string")) 5788 return MatchOperand_ParseFail; 5789 5790 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5791 if (Res == MatchOperand_NoMatch) 5792 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5793 if (Res != MatchOperand_Success) 5794 return Res; 5795 5796 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5797 return MatchOperand_ParseFail; 5798 5799 return MatchOperand_Success; 5800 } 5801 5802 return parseNumericFormat(Format); 5803 } 5804 5805 OperandMatchResultTy 5806 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5807 using namespace llvm::AMDGPU::MTBUFFormat; 5808 5809 int64_t Format = getDefaultFormatEncoding(getSTI()); 5810 OperandMatchResultTy Res; 5811 SMLoc Loc = getLoc(); 5812 5813 // Parse legacy format syntax. 5814 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5815 if (Res == MatchOperand_ParseFail) 5816 return Res; 5817 5818 bool FormatFound = (Res == MatchOperand_Success); 5819 5820 Operands.push_back( 5821 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5822 5823 if (FormatFound) 5824 trySkipToken(AsmToken::Comma); 5825 5826 if (isToken(AsmToken::EndOfStatement)) { 5827 // We are expecting an soffset operand, 5828 // but let matcher handle the error. 5829 return MatchOperand_Success; 5830 } 5831 5832 // Parse soffset. 5833 Res = parseRegOrImm(Operands); 5834 if (Res != MatchOperand_Success) 5835 return Res; 5836 5837 trySkipToken(AsmToken::Comma); 5838 5839 if (!FormatFound) { 5840 Res = parseSymbolicOrNumericFormat(Format); 5841 if (Res == MatchOperand_ParseFail) 5842 return Res; 5843 if (Res == MatchOperand_Success) { 5844 auto Size = Operands.size(); 5845 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5846 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5847 Op.setImm(Format); 5848 } 5849 return MatchOperand_Success; 5850 } 5851 5852 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5853 Error(getLoc(), "duplicate format"); 5854 return MatchOperand_ParseFail; 5855 } 5856 return MatchOperand_Success; 5857 } 5858 5859 //===----------------------------------------------------------------------===// 5860 // ds 5861 //===----------------------------------------------------------------------===// 5862 5863 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5864 const OperandVector &Operands) { 5865 OptionalImmIndexMap OptionalIdx; 5866 5867 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5868 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5869 5870 // Add the register arguments 5871 if (Op.isReg()) { 5872 Op.addRegOperands(Inst, 1); 5873 continue; 5874 } 5875 5876 // Handle optional arguments 5877 OptionalIdx[Op.getImmTy()] = i; 5878 } 5879 5880 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5881 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5882 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5883 5884 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5885 } 5886 5887 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5888 bool IsGdsHardcoded) { 5889 OptionalImmIndexMap OptionalIdx; 5890 5891 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5892 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5893 5894 // Add the register arguments 5895 if (Op.isReg()) { 5896 Op.addRegOperands(Inst, 1); 5897 continue; 5898 } 5899 5900 if (Op.isToken() && Op.getToken() == "gds") { 5901 IsGdsHardcoded = true; 5902 continue; 5903 } 5904 5905 // Handle optional arguments 5906 OptionalIdx[Op.getImmTy()] = i; 5907 } 5908 5909 AMDGPUOperand::ImmTy OffsetType = 5910 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5911 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5912 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5913 AMDGPUOperand::ImmTyOffset; 5914 5915 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5916 5917 if (!IsGdsHardcoded) { 5918 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5919 } 5920 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5921 } 5922 5923 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5924 OptionalImmIndexMap OptionalIdx; 5925 5926 unsigned OperandIdx[4]; 5927 unsigned EnMask = 0; 5928 int SrcIdx = 0; 5929 5930 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5931 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5932 5933 // Add the register arguments 5934 if (Op.isReg()) { 5935 assert(SrcIdx < 4); 5936 OperandIdx[SrcIdx] = Inst.size(); 5937 Op.addRegOperands(Inst, 1); 5938 ++SrcIdx; 5939 continue; 5940 } 5941 5942 if (Op.isOff()) { 5943 assert(SrcIdx < 4); 5944 OperandIdx[SrcIdx] = Inst.size(); 5945 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5946 ++SrcIdx; 5947 continue; 5948 } 5949 5950 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5951 Op.addImmOperands(Inst, 1); 5952 continue; 5953 } 5954 5955 if (Op.isToken() && Op.getToken() == "done") 5956 continue; 5957 5958 // Handle optional arguments 5959 OptionalIdx[Op.getImmTy()] = i; 5960 } 5961 5962 assert(SrcIdx == 4); 5963 5964 bool Compr = false; 5965 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5966 Compr = true; 5967 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5968 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5969 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5970 } 5971 5972 for (auto i = 0; i < SrcIdx; ++i) { 5973 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5974 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5975 } 5976 } 5977 5978 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5980 5981 Inst.addOperand(MCOperand::createImm(EnMask)); 5982 } 5983 5984 //===----------------------------------------------------------------------===// 5985 // s_waitcnt 5986 //===----------------------------------------------------------------------===// 5987 5988 static bool 5989 encodeCnt( 5990 const AMDGPU::IsaVersion ISA, 5991 int64_t &IntVal, 5992 int64_t CntVal, 5993 bool Saturate, 5994 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5995 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5996 { 5997 bool Failed = false; 5998 5999 IntVal = encode(ISA, IntVal, CntVal); 6000 if (CntVal != decode(ISA, IntVal)) { 6001 if (Saturate) { 6002 IntVal = encode(ISA, IntVal, -1); 6003 } else { 6004 Failed = true; 6005 } 6006 } 6007 return Failed; 6008 } 6009 6010 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6011 6012 SMLoc CntLoc = getLoc(); 6013 StringRef CntName = getTokenStr(); 6014 6015 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6016 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6017 return false; 6018 6019 int64_t CntVal; 6020 SMLoc ValLoc = getLoc(); 6021 if (!parseExpr(CntVal)) 6022 return false; 6023 6024 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6025 6026 bool Failed = true; 6027 bool Sat = CntName.endswith("_sat"); 6028 6029 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6030 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6031 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6032 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6033 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6034 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6035 } else { 6036 Error(CntLoc, "invalid counter name " + CntName); 6037 return false; 6038 } 6039 6040 if (Failed) { 6041 Error(ValLoc, "too large value for " + CntName); 6042 return false; 6043 } 6044 6045 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6046 return false; 6047 6048 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6049 if (isToken(AsmToken::EndOfStatement)) { 6050 Error(getLoc(), "expected a counter name"); 6051 return false; 6052 } 6053 } 6054 6055 return true; 6056 } 6057 6058 OperandMatchResultTy 6059 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6060 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6061 int64_t Waitcnt = getWaitcntBitMask(ISA); 6062 SMLoc S = getLoc(); 6063 6064 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6065 while (!isToken(AsmToken::EndOfStatement)) { 6066 if (!parseCnt(Waitcnt)) 6067 return MatchOperand_ParseFail; 6068 } 6069 } else { 6070 if (!parseExpr(Waitcnt)) 6071 return MatchOperand_ParseFail; 6072 } 6073 6074 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6075 return MatchOperand_Success; 6076 } 6077 6078 bool 6079 AMDGPUOperand::isSWaitCnt() const { 6080 return isImm(); 6081 } 6082 6083 //===----------------------------------------------------------------------===// 6084 // hwreg 6085 //===----------------------------------------------------------------------===// 6086 6087 bool 6088 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6089 OperandInfoTy &Offset, 6090 OperandInfoTy &Width) { 6091 using namespace llvm::AMDGPU::Hwreg; 6092 6093 // The register may be specified by name or using a numeric code 6094 HwReg.Loc = getLoc(); 6095 if (isToken(AsmToken::Identifier) && 6096 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6097 HwReg.IsSymbolic = true; 6098 lex(); // skip register name 6099 } else if (!parseExpr(HwReg.Id, "a register name")) { 6100 return false; 6101 } 6102 6103 if (trySkipToken(AsmToken::RParen)) 6104 return true; 6105 6106 // parse optional params 6107 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6108 return false; 6109 6110 Offset.Loc = getLoc(); 6111 if (!parseExpr(Offset.Id)) 6112 return false; 6113 6114 if (!skipToken(AsmToken::Comma, "expected a comma")) 6115 return false; 6116 6117 Width.Loc = getLoc(); 6118 return parseExpr(Width.Id) && 6119 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6120 } 6121 6122 bool 6123 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6124 const OperandInfoTy &Offset, 6125 const OperandInfoTy &Width) { 6126 6127 using namespace llvm::AMDGPU::Hwreg; 6128 6129 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6130 Error(HwReg.Loc, 6131 "specified hardware register is not supported on this GPU"); 6132 return false; 6133 } 6134 if (!isValidHwreg(HwReg.Id)) { 6135 Error(HwReg.Loc, 6136 "invalid code of hardware register: only 6-bit values are legal"); 6137 return false; 6138 } 6139 if (!isValidHwregOffset(Offset.Id)) { 6140 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6141 return false; 6142 } 6143 if (!isValidHwregWidth(Width.Id)) { 6144 Error(Width.Loc, 6145 "invalid bitfield width: only values from 1 to 32 are legal"); 6146 return false; 6147 } 6148 return true; 6149 } 6150 6151 OperandMatchResultTy 6152 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6153 using namespace llvm::AMDGPU::Hwreg; 6154 6155 int64_t ImmVal = 0; 6156 SMLoc Loc = getLoc(); 6157 6158 if (trySkipId("hwreg", AsmToken::LParen)) { 6159 OperandInfoTy HwReg(ID_UNKNOWN_); 6160 OperandInfoTy Offset(OFFSET_DEFAULT_); 6161 OperandInfoTy Width(WIDTH_DEFAULT_); 6162 if (parseHwregBody(HwReg, Offset, Width) && 6163 validateHwreg(HwReg, Offset, Width)) { 6164 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6165 } else { 6166 return MatchOperand_ParseFail; 6167 } 6168 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6169 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6170 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6171 return MatchOperand_ParseFail; 6172 } 6173 } else { 6174 return MatchOperand_ParseFail; 6175 } 6176 6177 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6178 return MatchOperand_Success; 6179 } 6180 6181 bool AMDGPUOperand::isHwreg() const { 6182 return isImmTy(ImmTyHwreg); 6183 } 6184 6185 //===----------------------------------------------------------------------===// 6186 // sendmsg 6187 //===----------------------------------------------------------------------===// 6188 6189 bool 6190 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6191 OperandInfoTy &Op, 6192 OperandInfoTy &Stream) { 6193 using namespace llvm::AMDGPU::SendMsg; 6194 6195 Msg.Loc = getLoc(); 6196 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6197 Msg.IsSymbolic = true; 6198 lex(); // skip message name 6199 } else if (!parseExpr(Msg.Id, "a message name")) { 6200 return false; 6201 } 6202 6203 if (trySkipToken(AsmToken::Comma)) { 6204 Op.IsDefined = true; 6205 Op.Loc = getLoc(); 6206 if (isToken(AsmToken::Identifier) && 6207 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6208 lex(); // skip operation name 6209 } else if (!parseExpr(Op.Id, "an operation name")) { 6210 return false; 6211 } 6212 6213 if (trySkipToken(AsmToken::Comma)) { 6214 Stream.IsDefined = true; 6215 Stream.Loc = getLoc(); 6216 if (!parseExpr(Stream.Id)) 6217 return false; 6218 } 6219 } 6220 6221 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6222 } 6223 6224 bool 6225 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6226 const OperandInfoTy &Op, 6227 const OperandInfoTy &Stream) { 6228 using namespace llvm::AMDGPU::SendMsg; 6229 6230 // Validation strictness depends on whether message is specified 6231 // in a symbolc or in a numeric form. In the latter case 6232 // only encoding possibility is checked. 6233 bool Strict = Msg.IsSymbolic; 6234 6235 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6236 Error(Msg.Loc, "invalid message id"); 6237 return false; 6238 } 6239 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6240 if (Op.IsDefined) { 6241 Error(Op.Loc, "message does not support operations"); 6242 } else { 6243 Error(Msg.Loc, "missing message operation"); 6244 } 6245 return false; 6246 } 6247 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6248 Error(Op.Loc, "invalid operation id"); 6249 return false; 6250 } 6251 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6252 Error(Stream.Loc, "message operation does not support streams"); 6253 return false; 6254 } 6255 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6256 Error(Stream.Loc, "invalid message stream id"); 6257 return false; 6258 } 6259 return true; 6260 } 6261 6262 OperandMatchResultTy 6263 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6264 using namespace llvm::AMDGPU::SendMsg; 6265 6266 int64_t ImmVal = 0; 6267 SMLoc Loc = getLoc(); 6268 6269 if (trySkipId("sendmsg", AsmToken::LParen)) { 6270 OperandInfoTy Msg(ID_UNKNOWN_); 6271 OperandInfoTy Op(OP_NONE_); 6272 OperandInfoTy Stream(STREAM_ID_NONE_); 6273 if (parseSendMsgBody(Msg, Op, Stream) && 6274 validateSendMsg(Msg, Op, Stream)) { 6275 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6276 } else { 6277 return MatchOperand_ParseFail; 6278 } 6279 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6280 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6281 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6282 return MatchOperand_ParseFail; 6283 } 6284 } else { 6285 return MatchOperand_ParseFail; 6286 } 6287 6288 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6289 return MatchOperand_Success; 6290 } 6291 6292 bool AMDGPUOperand::isSendMsg() const { 6293 return isImmTy(ImmTySendMsg); 6294 } 6295 6296 //===----------------------------------------------------------------------===// 6297 // v_interp 6298 //===----------------------------------------------------------------------===// 6299 6300 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6301 StringRef Str; 6302 SMLoc S = getLoc(); 6303 6304 if (!parseId(Str)) 6305 return MatchOperand_NoMatch; 6306 6307 int Slot = StringSwitch<int>(Str) 6308 .Case("p10", 0) 6309 .Case("p20", 1) 6310 .Case("p0", 2) 6311 .Default(-1); 6312 6313 if (Slot == -1) { 6314 Error(S, "invalid interpolation slot"); 6315 return MatchOperand_ParseFail; 6316 } 6317 6318 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6319 AMDGPUOperand::ImmTyInterpSlot)); 6320 return MatchOperand_Success; 6321 } 6322 6323 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6324 StringRef Str; 6325 SMLoc S = getLoc(); 6326 6327 if (!parseId(Str)) 6328 return MatchOperand_NoMatch; 6329 6330 if (!Str.startswith("attr")) { 6331 Error(S, "invalid interpolation attribute"); 6332 return MatchOperand_ParseFail; 6333 } 6334 6335 StringRef Chan = Str.take_back(2); 6336 int AttrChan = StringSwitch<int>(Chan) 6337 .Case(".x", 0) 6338 .Case(".y", 1) 6339 .Case(".z", 2) 6340 .Case(".w", 3) 6341 .Default(-1); 6342 if (AttrChan == -1) { 6343 Error(S, "invalid or missing interpolation attribute channel"); 6344 return MatchOperand_ParseFail; 6345 } 6346 6347 Str = Str.drop_back(2).drop_front(4); 6348 6349 uint8_t Attr; 6350 if (Str.getAsInteger(10, Attr)) { 6351 Error(S, "invalid or missing interpolation attribute number"); 6352 return MatchOperand_ParseFail; 6353 } 6354 6355 if (Attr > 63) { 6356 Error(S, "out of bounds interpolation attribute number"); 6357 return MatchOperand_ParseFail; 6358 } 6359 6360 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6361 6362 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6363 AMDGPUOperand::ImmTyInterpAttr)); 6364 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6365 AMDGPUOperand::ImmTyAttrChan)); 6366 return MatchOperand_Success; 6367 } 6368 6369 //===----------------------------------------------------------------------===// 6370 // exp 6371 //===----------------------------------------------------------------------===// 6372 6373 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6374 using namespace llvm::AMDGPU::Exp; 6375 6376 StringRef Str; 6377 SMLoc S = getLoc(); 6378 6379 if (!parseId(Str)) 6380 return MatchOperand_NoMatch; 6381 6382 unsigned Id = getTgtId(Str); 6383 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6384 Error(S, (Id == ET_INVALID) ? 6385 "invalid exp target" : 6386 "exp target is not supported on this GPU"); 6387 return MatchOperand_ParseFail; 6388 } 6389 6390 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6391 AMDGPUOperand::ImmTyExpTgt)); 6392 return MatchOperand_Success; 6393 } 6394 6395 //===----------------------------------------------------------------------===// 6396 // parser helpers 6397 //===----------------------------------------------------------------------===// 6398 6399 bool 6400 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6401 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6402 } 6403 6404 bool 6405 AMDGPUAsmParser::isId(const StringRef Id) const { 6406 return isId(getToken(), Id); 6407 } 6408 6409 bool 6410 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6411 return getTokenKind() == Kind; 6412 } 6413 6414 bool 6415 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6416 if (isId(Id)) { 6417 lex(); 6418 return true; 6419 } 6420 return false; 6421 } 6422 6423 bool 6424 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6425 if (isToken(AsmToken::Identifier)) { 6426 StringRef Tok = getTokenStr(); 6427 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6428 lex(); 6429 return true; 6430 } 6431 } 6432 return false; 6433 } 6434 6435 bool 6436 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6437 if (isId(Id) && peekToken().is(Kind)) { 6438 lex(); 6439 lex(); 6440 return true; 6441 } 6442 return false; 6443 } 6444 6445 bool 6446 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6447 if (isToken(Kind)) { 6448 lex(); 6449 return true; 6450 } 6451 return false; 6452 } 6453 6454 bool 6455 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6456 const StringRef ErrMsg) { 6457 if (!trySkipToken(Kind)) { 6458 Error(getLoc(), ErrMsg); 6459 return false; 6460 } 6461 return true; 6462 } 6463 6464 bool 6465 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6466 SMLoc S = getLoc(); 6467 6468 const MCExpr *Expr; 6469 if (Parser.parseExpression(Expr)) 6470 return false; 6471 6472 if (Expr->evaluateAsAbsolute(Imm)) 6473 return true; 6474 6475 if (Expected.empty()) { 6476 Error(S, "expected absolute expression"); 6477 } else { 6478 Error(S, Twine("expected ", Expected) + 6479 Twine(" or an absolute expression")); 6480 } 6481 return false; 6482 } 6483 6484 bool 6485 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6486 SMLoc S = getLoc(); 6487 6488 const MCExpr *Expr; 6489 if (Parser.parseExpression(Expr)) 6490 return false; 6491 6492 int64_t IntVal; 6493 if (Expr->evaluateAsAbsolute(IntVal)) { 6494 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6495 } else { 6496 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6497 } 6498 return true; 6499 } 6500 6501 bool 6502 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6503 if (isToken(AsmToken::String)) { 6504 Val = getToken().getStringContents(); 6505 lex(); 6506 return true; 6507 } else { 6508 Error(getLoc(), ErrMsg); 6509 return false; 6510 } 6511 } 6512 6513 bool 6514 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6515 if (isToken(AsmToken::Identifier)) { 6516 Val = getTokenStr(); 6517 lex(); 6518 return true; 6519 } else { 6520 if (!ErrMsg.empty()) 6521 Error(getLoc(), ErrMsg); 6522 return false; 6523 } 6524 } 6525 6526 AsmToken 6527 AMDGPUAsmParser::getToken() const { 6528 return Parser.getTok(); 6529 } 6530 6531 AsmToken 6532 AMDGPUAsmParser::peekToken() { 6533 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6534 } 6535 6536 void 6537 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6538 auto TokCount = getLexer().peekTokens(Tokens); 6539 6540 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6541 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6542 } 6543 6544 AsmToken::TokenKind 6545 AMDGPUAsmParser::getTokenKind() const { 6546 return getLexer().getKind(); 6547 } 6548 6549 SMLoc 6550 AMDGPUAsmParser::getLoc() const { 6551 return getToken().getLoc(); 6552 } 6553 6554 StringRef 6555 AMDGPUAsmParser::getTokenStr() const { 6556 return getToken().getString(); 6557 } 6558 6559 void 6560 AMDGPUAsmParser::lex() { 6561 Parser.Lex(); 6562 } 6563 6564 SMLoc 6565 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6566 const OperandVector &Operands) const { 6567 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6568 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6569 if (Test(Op)) 6570 return Op.getStartLoc(); 6571 } 6572 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6573 } 6574 6575 SMLoc 6576 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6577 const OperandVector &Operands) const { 6578 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6579 return getOperandLoc(Test, Operands); 6580 } 6581 6582 SMLoc 6583 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6584 const OperandVector &Operands) const { 6585 auto Test = [=](const AMDGPUOperand& Op) { 6586 return Op.isRegKind() && Op.getReg() == Reg; 6587 }; 6588 return getOperandLoc(Test, Operands); 6589 } 6590 6591 SMLoc 6592 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6593 auto Test = [](const AMDGPUOperand& Op) { 6594 return Op.IsImmKindLiteral() || Op.isExpr(); 6595 }; 6596 return getOperandLoc(Test, Operands); 6597 } 6598 6599 SMLoc 6600 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6601 auto Test = [](const AMDGPUOperand& Op) { 6602 return Op.isImmKindConst(); 6603 }; 6604 return getOperandLoc(Test, Operands); 6605 } 6606 6607 //===----------------------------------------------------------------------===// 6608 // swizzle 6609 //===----------------------------------------------------------------------===// 6610 6611 LLVM_READNONE 6612 static unsigned 6613 encodeBitmaskPerm(const unsigned AndMask, 6614 const unsigned OrMask, 6615 const unsigned XorMask) { 6616 using namespace llvm::AMDGPU::Swizzle; 6617 6618 return BITMASK_PERM_ENC | 6619 (AndMask << BITMASK_AND_SHIFT) | 6620 (OrMask << BITMASK_OR_SHIFT) | 6621 (XorMask << BITMASK_XOR_SHIFT); 6622 } 6623 6624 bool 6625 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6626 const unsigned MinVal, 6627 const unsigned MaxVal, 6628 const StringRef ErrMsg, 6629 SMLoc &Loc) { 6630 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6631 return false; 6632 } 6633 Loc = getLoc(); 6634 if (!parseExpr(Op)) { 6635 return false; 6636 } 6637 if (Op < MinVal || Op > MaxVal) { 6638 Error(Loc, ErrMsg); 6639 return false; 6640 } 6641 6642 return true; 6643 } 6644 6645 bool 6646 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6647 const unsigned MinVal, 6648 const unsigned MaxVal, 6649 const StringRef ErrMsg) { 6650 SMLoc Loc; 6651 for (unsigned i = 0; i < OpNum; ++i) { 6652 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6653 return false; 6654 } 6655 6656 return true; 6657 } 6658 6659 bool 6660 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6661 using namespace llvm::AMDGPU::Swizzle; 6662 6663 int64_t Lane[LANE_NUM]; 6664 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6665 "expected a 2-bit lane id")) { 6666 Imm = QUAD_PERM_ENC; 6667 for (unsigned I = 0; I < LANE_NUM; ++I) { 6668 Imm |= Lane[I] << (LANE_SHIFT * I); 6669 } 6670 return true; 6671 } 6672 return false; 6673 } 6674 6675 bool 6676 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6677 using namespace llvm::AMDGPU::Swizzle; 6678 6679 SMLoc Loc; 6680 int64_t GroupSize; 6681 int64_t LaneIdx; 6682 6683 if (!parseSwizzleOperand(GroupSize, 6684 2, 32, 6685 "group size must be in the interval [2,32]", 6686 Loc)) { 6687 return false; 6688 } 6689 if (!isPowerOf2_64(GroupSize)) { 6690 Error(Loc, "group size must be a power of two"); 6691 return false; 6692 } 6693 if (parseSwizzleOperand(LaneIdx, 6694 0, GroupSize - 1, 6695 "lane id must be in the interval [0,group size - 1]", 6696 Loc)) { 6697 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6698 return true; 6699 } 6700 return false; 6701 } 6702 6703 bool 6704 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6705 using namespace llvm::AMDGPU::Swizzle; 6706 6707 SMLoc Loc; 6708 int64_t GroupSize; 6709 6710 if (!parseSwizzleOperand(GroupSize, 6711 2, 32, 6712 "group size must be in the interval [2,32]", 6713 Loc)) { 6714 return false; 6715 } 6716 if (!isPowerOf2_64(GroupSize)) { 6717 Error(Loc, "group size must be a power of two"); 6718 return false; 6719 } 6720 6721 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6722 return true; 6723 } 6724 6725 bool 6726 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6727 using namespace llvm::AMDGPU::Swizzle; 6728 6729 SMLoc Loc; 6730 int64_t GroupSize; 6731 6732 if (!parseSwizzleOperand(GroupSize, 6733 1, 16, 6734 "group size must be in the interval [1,16]", 6735 Loc)) { 6736 return false; 6737 } 6738 if (!isPowerOf2_64(GroupSize)) { 6739 Error(Loc, "group size must be a power of two"); 6740 return false; 6741 } 6742 6743 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6744 return true; 6745 } 6746 6747 bool 6748 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6749 using namespace llvm::AMDGPU::Swizzle; 6750 6751 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6752 return false; 6753 } 6754 6755 StringRef Ctl; 6756 SMLoc StrLoc = getLoc(); 6757 if (!parseString(Ctl)) { 6758 return false; 6759 } 6760 if (Ctl.size() != BITMASK_WIDTH) { 6761 Error(StrLoc, "expected a 5-character mask"); 6762 return false; 6763 } 6764 6765 unsigned AndMask = 0; 6766 unsigned OrMask = 0; 6767 unsigned XorMask = 0; 6768 6769 for (size_t i = 0; i < Ctl.size(); ++i) { 6770 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6771 switch(Ctl[i]) { 6772 default: 6773 Error(StrLoc, "invalid mask"); 6774 return false; 6775 case '0': 6776 break; 6777 case '1': 6778 OrMask |= Mask; 6779 break; 6780 case 'p': 6781 AndMask |= Mask; 6782 break; 6783 case 'i': 6784 AndMask |= Mask; 6785 XorMask |= Mask; 6786 break; 6787 } 6788 } 6789 6790 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6791 return true; 6792 } 6793 6794 bool 6795 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6796 6797 SMLoc OffsetLoc = getLoc(); 6798 6799 if (!parseExpr(Imm, "a swizzle macro")) { 6800 return false; 6801 } 6802 if (!isUInt<16>(Imm)) { 6803 Error(OffsetLoc, "expected a 16-bit offset"); 6804 return false; 6805 } 6806 return true; 6807 } 6808 6809 bool 6810 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6811 using namespace llvm::AMDGPU::Swizzle; 6812 6813 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6814 6815 SMLoc ModeLoc = getLoc(); 6816 bool Ok = false; 6817 6818 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6819 Ok = parseSwizzleQuadPerm(Imm); 6820 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6821 Ok = parseSwizzleBitmaskPerm(Imm); 6822 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6823 Ok = parseSwizzleBroadcast(Imm); 6824 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6825 Ok = parseSwizzleSwap(Imm); 6826 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6827 Ok = parseSwizzleReverse(Imm); 6828 } else { 6829 Error(ModeLoc, "expected a swizzle mode"); 6830 } 6831 6832 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6833 } 6834 6835 return false; 6836 } 6837 6838 OperandMatchResultTy 6839 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6840 SMLoc S = getLoc(); 6841 int64_t Imm = 0; 6842 6843 if (trySkipId("offset")) { 6844 6845 bool Ok = false; 6846 if (skipToken(AsmToken::Colon, "expected a colon")) { 6847 if (trySkipId("swizzle")) { 6848 Ok = parseSwizzleMacro(Imm); 6849 } else { 6850 Ok = parseSwizzleOffset(Imm); 6851 } 6852 } 6853 6854 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6855 6856 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6857 } else { 6858 // Swizzle "offset" operand is optional. 6859 // If it is omitted, try parsing other optional operands. 6860 return parseOptionalOpr(Operands); 6861 } 6862 } 6863 6864 bool 6865 AMDGPUOperand::isSwizzle() const { 6866 return isImmTy(ImmTySwizzle); 6867 } 6868 6869 //===----------------------------------------------------------------------===// 6870 // VGPR Index Mode 6871 //===----------------------------------------------------------------------===// 6872 6873 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6874 6875 using namespace llvm::AMDGPU::VGPRIndexMode; 6876 6877 if (trySkipToken(AsmToken::RParen)) { 6878 return OFF; 6879 } 6880 6881 int64_t Imm = 0; 6882 6883 while (true) { 6884 unsigned Mode = 0; 6885 SMLoc S = getLoc(); 6886 6887 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6888 if (trySkipId(IdSymbolic[ModeId])) { 6889 Mode = 1 << ModeId; 6890 break; 6891 } 6892 } 6893 6894 if (Mode == 0) { 6895 Error(S, (Imm == 0)? 6896 "expected a VGPR index mode or a closing parenthesis" : 6897 "expected a VGPR index mode"); 6898 return UNDEF; 6899 } 6900 6901 if (Imm & Mode) { 6902 Error(S, "duplicate VGPR index mode"); 6903 return UNDEF; 6904 } 6905 Imm |= Mode; 6906 6907 if (trySkipToken(AsmToken::RParen)) 6908 break; 6909 if (!skipToken(AsmToken::Comma, 6910 "expected a comma or a closing parenthesis")) 6911 return UNDEF; 6912 } 6913 6914 return Imm; 6915 } 6916 6917 OperandMatchResultTy 6918 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6919 6920 using namespace llvm::AMDGPU::VGPRIndexMode; 6921 6922 int64_t Imm = 0; 6923 SMLoc S = getLoc(); 6924 6925 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6926 Imm = parseGPRIdxMacro(); 6927 if (Imm == UNDEF) 6928 return MatchOperand_ParseFail; 6929 } else { 6930 if (getParser().parseAbsoluteExpression(Imm)) 6931 return MatchOperand_ParseFail; 6932 if (Imm < 0 || !isUInt<4>(Imm)) { 6933 Error(S, "invalid immediate: only 4-bit values are legal"); 6934 return MatchOperand_ParseFail; 6935 } 6936 } 6937 6938 Operands.push_back( 6939 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6940 return MatchOperand_Success; 6941 } 6942 6943 bool AMDGPUOperand::isGPRIdxMode() const { 6944 return isImmTy(ImmTyGprIdxMode); 6945 } 6946 6947 //===----------------------------------------------------------------------===// 6948 // sopp branch targets 6949 //===----------------------------------------------------------------------===// 6950 6951 OperandMatchResultTy 6952 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6953 6954 // Make sure we are not parsing something 6955 // that looks like a label or an expression but is not. 6956 // This will improve error messages. 6957 if (isRegister() || isModifier()) 6958 return MatchOperand_NoMatch; 6959 6960 if (!parseExpr(Operands)) 6961 return MatchOperand_ParseFail; 6962 6963 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6964 assert(Opr.isImm() || Opr.isExpr()); 6965 SMLoc Loc = Opr.getStartLoc(); 6966 6967 // Currently we do not support arbitrary expressions as branch targets. 6968 // Only labels and absolute expressions are accepted. 6969 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6970 Error(Loc, "expected an absolute expression or a label"); 6971 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6972 Error(Loc, "expected a 16-bit signed jump offset"); 6973 } 6974 6975 return MatchOperand_Success; 6976 } 6977 6978 //===----------------------------------------------------------------------===// 6979 // Boolean holding registers 6980 //===----------------------------------------------------------------------===// 6981 6982 OperandMatchResultTy 6983 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6984 return parseReg(Operands); 6985 } 6986 6987 //===----------------------------------------------------------------------===// 6988 // mubuf 6989 //===----------------------------------------------------------------------===// 6990 6991 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6992 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6993 } 6994 6995 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6996 const OperandVector &Operands, 6997 bool IsAtomic, 6998 bool IsLds) { 6999 bool IsLdsOpcode = IsLds; 7000 bool HasLdsModifier = false; 7001 OptionalImmIndexMap OptionalIdx; 7002 unsigned FirstOperandIdx = 1; 7003 bool IsAtomicReturn = false; 7004 7005 if (IsAtomic) { 7006 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7007 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7008 if (!Op.isCPol()) 7009 continue; 7010 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7011 break; 7012 } 7013 7014 if (!IsAtomicReturn) { 7015 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7016 if (NewOpc != -1) 7017 Inst.setOpcode(NewOpc); 7018 } 7019 7020 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7021 SIInstrFlags::IsAtomicRet; 7022 } 7023 7024 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7025 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7026 7027 // Add the register arguments 7028 if (Op.isReg()) { 7029 Op.addRegOperands(Inst, 1); 7030 // Insert a tied src for atomic return dst. 7031 // This cannot be postponed as subsequent calls to 7032 // addImmOperands rely on correct number of MC operands. 7033 if (IsAtomicReturn && i == FirstOperandIdx) 7034 Op.addRegOperands(Inst, 1); 7035 continue; 7036 } 7037 7038 // Handle the case where soffset is an immediate 7039 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7040 Op.addImmOperands(Inst, 1); 7041 continue; 7042 } 7043 7044 HasLdsModifier |= Op.isLDS(); 7045 7046 // Handle tokens like 'offen' which are sometimes hard-coded into the 7047 // asm string. There are no MCInst operands for these. 7048 if (Op.isToken()) { 7049 continue; 7050 } 7051 assert(Op.isImm()); 7052 7053 // Handle optional arguments 7054 OptionalIdx[Op.getImmTy()] = i; 7055 } 7056 7057 // This is a workaround for an llvm quirk which may result in an 7058 // incorrect instruction selection. Lds and non-lds versions of 7059 // MUBUF instructions are identical except that lds versions 7060 // have mandatory 'lds' modifier. However this modifier follows 7061 // optional modifiers and llvm asm matcher regards this 'lds' 7062 // modifier as an optional one. As a result, an lds version 7063 // of opcode may be selected even if it has no 'lds' modifier. 7064 if (IsLdsOpcode && !HasLdsModifier) { 7065 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7066 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7067 Inst.setOpcode(NoLdsOpcode); 7068 IsLdsOpcode = false; 7069 } 7070 } 7071 7072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7073 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7074 7075 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7077 } 7078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7079 } 7080 7081 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7082 OptionalImmIndexMap OptionalIdx; 7083 7084 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7085 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7086 7087 // Add the register arguments 7088 if (Op.isReg()) { 7089 Op.addRegOperands(Inst, 1); 7090 continue; 7091 } 7092 7093 // Handle the case where soffset is an immediate 7094 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7095 Op.addImmOperands(Inst, 1); 7096 continue; 7097 } 7098 7099 // Handle tokens like 'offen' which are sometimes hard-coded into the 7100 // asm string. There are no MCInst operands for these. 7101 if (Op.isToken()) { 7102 continue; 7103 } 7104 assert(Op.isImm()); 7105 7106 // Handle optional arguments 7107 OptionalIdx[Op.getImmTy()] = i; 7108 } 7109 7110 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7111 AMDGPUOperand::ImmTyOffset); 7112 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7113 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7116 } 7117 7118 //===----------------------------------------------------------------------===// 7119 // mimg 7120 //===----------------------------------------------------------------------===// 7121 7122 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7123 bool IsAtomic) { 7124 unsigned I = 1; 7125 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7126 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7127 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7128 } 7129 7130 if (IsAtomic) { 7131 // Add src, same as dst 7132 assert(Desc.getNumDefs() == 1); 7133 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7134 } 7135 7136 OptionalImmIndexMap OptionalIdx; 7137 7138 for (unsigned E = Operands.size(); I != E; ++I) { 7139 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7140 7141 // Add the register arguments 7142 if (Op.isReg()) { 7143 Op.addRegOperands(Inst, 1); 7144 } else if (Op.isImmModifier()) { 7145 OptionalIdx[Op.getImmTy()] = I; 7146 } else if (!Op.isToken()) { 7147 llvm_unreachable("unexpected operand type"); 7148 } 7149 } 7150 7151 bool IsGFX10Plus = isGFX10Plus(); 7152 7153 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7154 if (IsGFX10Plus) 7155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7159 if (IsGFX10Plus) 7160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7161 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7162 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7163 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7164 if (!IsGFX10Plus) 7165 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7166 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7167 } 7168 7169 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7170 cvtMIMG(Inst, Operands, true); 7171 } 7172 7173 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7174 OptionalImmIndexMap OptionalIdx; 7175 bool IsAtomicReturn = false; 7176 7177 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7178 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7179 if (!Op.isCPol()) 7180 continue; 7181 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7182 break; 7183 } 7184 7185 if (!IsAtomicReturn) { 7186 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7187 if (NewOpc != -1) 7188 Inst.setOpcode(NewOpc); 7189 } 7190 7191 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7192 SIInstrFlags::IsAtomicRet; 7193 7194 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7195 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7196 7197 // Add the register arguments 7198 if (Op.isReg()) { 7199 Op.addRegOperands(Inst, 1); 7200 if (IsAtomicReturn && i == 1) 7201 Op.addRegOperands(Inst, 1); 7202 continue; 7203 } 7204 7205 // Handle the case where soffset is an immediate 7206 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7207 Op.addImmOperands(Inst, 1); 7208 continue; 7209 } 7210 7211 // Handle tokens like 'offen' which are sometimes hard-coded into the 7212 // asm string. There are no MCInst operands for these. 7213 if (Op.isToken()) { 7214 continue; 7215 } 7216 assert(Op.isImm()); 7217 7218 // Handle optional arguments 7219 OptionalIdx[Op.getImmTy()] = i; 7220 } 7221 7222 if ((int)Inst.getNumOperands() <= 7223 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7224 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7225 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7226 } 7227 7228 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7229 const OperandVector &Operands) { 7230 for (unsigned I = 1; I < Operands.size(); ++I) { 7231 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7232 if (Operand.isReg()) 7233 Operand.addRegOperands(Inst, 1); 7234 } 7235 7236 Inst.addOperand(MCOperand::createImm(1)); // a16 7237 } 7238 7239 //===----------------------------------------------------------------------===// 7240 // smrd 7241 //===----------------------------------------------------------------------===// 7242 7243 bool AMDGPUOperand::isSMRDOffset8() const { 7244 return isImm() && isUInt<8>(getImm()); 7245 } 7246 7247 bool AMDGPUOperand::isSMEMOffset() const { 7248 return isImm(); // Offset range is checked later by validator. 7249 } 7250 7251 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7252 // 32-bit literals are only supported on CI and we only want to use them 7253 // when the offset is > 8-bits. 7254 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7255 } 7256 7257 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7258 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7259 } 7260 7261 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7262 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7263 } 7264 7265 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7266 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7267 } 7268 7269 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7270 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7271 } 7272 7273 //===----------------------------------------------------------------------===// 7274 // vop3 7275 //===----------------------------------------------------------------------===// 7276 7277 static bool ConvertOmodMul(int64_t &Mul) { 7278 if (Mul != 1 && Mul != 2 && Mul != 4) 7279 return false; 7280 7281 Mul >>= 1; 7282 return true; 7283 } 7284 7285 static bool ConvertOmodDiv(int64_t &Div) { 7286 if (Div == 1) { 7287 Div = 0; 7288 return true; 7289 } 7290 7291 if (Div == 2) { 7292 Div = 3; 7293 return true; 7294 } 7295 7296 return false; 7297 } 7298 7299 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7300 // This is intentional and ensures compatibility with sp3. 7301 // See bug 35397 for details. 7302 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7303 if (BoundCtrl == 0 || BoundCtrl == 1) { 7304 BoundCtrl = 1; 7305 return true; 7306 } 7307 return false; 7308 } 7309 7310 // Note: the order in this table matches the order of operands in AsmString. 7311 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7312 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7313 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7314 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7315 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7316 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7317 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7318 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7319 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7320 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7321 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7322 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7323 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7324 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7325 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7326 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7327 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7328 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7329 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7330 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7331 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7332 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7333 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7334 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7335 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7336 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7337 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7338 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7339 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7340 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7341 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7342 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7343 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7344 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7345 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7346 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7347 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7348 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7349 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7350 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7351 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7352 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7353 }; 7354 7355 void AMDGPUAsmParser::onBeginOfFile() { 7356 if (!getParser().getStreamer().getTargetStreamer() || 7357 getSTI().getTargetTriple().getArch() == Triple::r600) 7358 return; 7359 7360 if (!getTargetStreamer().getTargetID()) 7361 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7362 7363 if (isHsaAbiVersion3Or4(&getSTI())) 7364 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7365 } 7366 7367 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7368 7369 OperandMatchResultTy res = parseOptionalOpr(Operands); 7370 7371 // This is a hack to enable hardcoded mandatory operands which follow 7372 // optional operands. 7373 // 7374 // Current design assumes that all operands after the first optional operand 7375 // are also optional. However implementation of some instructions violates 7376 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7377 // 7378 // To alleviate this problem, we have to (implicitly) parse extra operands 7379 // to make sure autogenerated parser of custom operands never hit hardcoded 7380 // mandatory operands. 7381 7382 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7383 if (res != MatchOperand_Success || 7384 isToken(AsmToken::EndOfStatement)) 7385 break; 7386 7387 trySkipToken(AsmToken::Comma); 7388 res = parseOptionalOpr(Operands); 7389 } 7390 7391 return res; 7392 } 7393 7394 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7395 OperandMatchResultTy res; 7396 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7397 // try to parse any optional operand here 7398 if (Op.IsBit) { 7399 res = parseNamedBit(Op.Name, Operands, Op.Type); 7400 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7401 res = parseOModOperand(Operands); 7402 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7403 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7404 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7405 res = parseSDWASel(Operands, Op.Name, Op.Type); 7406 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7407 res = parseSDWADstUnused(Operands); 7408 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7409 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7410 Op.Type == AMDGPUOperand::ImmTyNegLo || 7411 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7412 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7413 Op.ConvertResult); 7414 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7415 res = parseDim(Operands); 7416 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7417 res = parseCPol(Operands); 7418 } else { 7419 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7420 } 7421 if (res != MatchOperand_NoMatch) { 7422 return res; 7423 } 7424 } 7425 return MatchOperand_NoMatch; 7426 } 7427 7428 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7429 StringRef Name = getTokenStr(); 7430 if (Name == "mul") { 7431 return parseIntWithPrefix("mul", Operands, 7432 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7433 } 7434 7435 if (Name == "div") { 7436 return parseIntWithPrefix("div", Operands, 7437 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7438 } 7439 7440 return MatchOperand_NoMatch; 7441 } 7442 7443 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7444 cvtVOP3P(Inst, Operands); 7445 7446 int Opc = Inst.getOpcode(); 7447 7448 int SrcNum; 7449 const int Ops[] = { AMDGPU::OpName::src0, 7450 AMDGPU::OpName::src1, 7451 AMDGPU::OpName::src2 }; 7452 for (SrcNum = 0; 7453 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7454 ++SrcNum); 7455 assert(SrcNum > 0); 7456 7457 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7458 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7459 7460 if ((OpSel & (1 << SrcNum)) != 0) { 7461 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7462 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7463 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7464 } 7465 } 7466 7467 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7468 // 1. This operand is input modifiers 7469 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7470 // 2. This is not last operand 7471 && Desc.NumOperands > (OpNum + 1) 7472 // 3. Next operand is register class 7473 && Desc.OpInfo[OpNum + 1].RegClass != -1 7474 // 4. Next register is not tied to any other operand 7475 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7476 } 7477 7478 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7479 { 7480 OptionalImmIndexMap OptionalIdx; 7481 unsigned Opc = Inst.getOpcode(); 7482 7483 unsigned I = 1; 7484 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7485 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7486 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7487 } 7488 7489 for (unsigned E = Operands.size(); I != E; ++I) { 7490 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7491 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7492 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7493 } else if (Op.isInterpSlot() || 7494 Op.isInterpAttr() || 7495 Op.isAttrChan()) { 7496 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7497 } else if (Op.isImmModifier()) { 7498 OptionalIdx[Op.getImmTy()] = I; 7499 } else { 7500 llvm_unreachable("unhandled operand type"); 7501 } 7502 } 7503 7504 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7506 } 7507 7508 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7509 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7510 } 7511 7512 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7513 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7514 } 7515 } 7516 7517 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7518 OptionalImmIndexMap &OptionalIdx) { 7519 unsigned Opc = Inst.getOpcode(); 7520 7521 unsigned I = 1; 7522 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7523 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7524 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7525 } 7526 7527 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7528 // This instruction has src modifiers 7529 for (unsigned E = Operands.size(); I != E; ++I) { 7530 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7531 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7532 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7533 } else if (Op.isImmModifier()) { 7534 OptionalIdx[Op.getImmTy()] = I; 7535 } else if (Op.isRegOrImm()) { 7536 Op.addRegOrImmOperands(Inst, 1); 7537 } else { 7538 llvm_unreachable("unhandled operand type"); 7539 } 7540 } 7541 } else { 7542 // No src modifiers 7543 for (unsigned E = Operands.size(); I != E; ++I) { 7544 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7545 if (Op.isMod()) { 7546 OptionalIdx[Op.getImmTy()] = I; 7547 } else { 7548 Op.addRegOrImmOperands(Inst, 1); 7549 } 7550 } 7551 } 7552 7553 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7554 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7555 } 7556 7557 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7558 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7559 } 7560 7561 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7562 // it has src2 register operand that is tied to dst operand 7563 // we don't allow modifiers for this operand in assembler so src2_modifiers 7564 // should be 0. 7565 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7566 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7567 Opc == AMDGPU::V_MAC_F32_e64_vi || 7568 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7569 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7570 Opc == AMDGPU::V_MAC_F16_e64_vi || 7571 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7572 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7573 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7574 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7575 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7576 auto it = Inst.begin(); 7577 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7578 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7579 ++it; 7580 // Copy the operand to ensure it's not invalidated when Inst grows. 7581 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7582 } 7583 } 7584 7585 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7586 OptionalImmIndexMap OptionalIdx; 7587 cvtVOP3(Inst, Operands, OptionalIdx); 7588 } 7589 7590 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7591 OptionalImmIndexMap &OptIdx) { 7592 const int Opc = Inst.getOpcode(); 7593 const MCInstrDesc &Desc = MII.get(Opc); 7594 7595 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7596 7597 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7598 assert(!IsPacked); 7599 Inst.addOperand(Inst.getOperand(0)); 7600 } 7601 7602 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7603 // instruction, and then figure out where to actually put the modifiers 7604 7605 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7606 if (OpSelIdx != -1) { 7607 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7608 } 7609 7610 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7611 if (OpSelHiIdx != -1) { 7612 int DefaultVal = IsPacked ? -1 : 0; 7613 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7614 DefaultVal); 7615 } 7616 7617 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7618 if (NegLoIdx != -1) { 7619 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7620 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7621 } 7622 7623 const int Ops[] = { AMDGPU::OpName::src0, 7624 AMDGPU::OpName::src1, 7625 AMDGPU::OpName::src2 }; 7626 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7627 AMDGPU::OpName::src1_modifiers, 7628 AMDGPU::OpName::src2_modifiers }; 7629 7630 unsigned OpSel = 0; 7631 unsigned OpSelHi = 0; 7632 unsigned NegLo = 0; 7633 unsigned NegHi = 0; 7634 7635 if (OpSelIdx != -1) 7636 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7637 7638 if (OpSelHiIdx != -1) 7639 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7640 7641 if (NegLoIdx != -1) { 7642 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7643 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7644 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7645 } 7646 7647 for (int J = 0; J < 3; ++J) { 7648 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7649 if (OpIdx == -1) 7650 break; 7651 7652 uint32_t ModVal = 0; 7653 7654 if ((OpSel & (1 << J)) != 0) 7655 ModVal |= SISrcMods::OP_SEL_0; 7656 7657 if ((OpSelHi & (1 << J)) != 0) 7658 ModVal |= SISrcMods::OP_SEL_1; 7659 7660 if ((NegLo & (1 << J)) != 0) 7661 ModVal |= SISrcMods::NEG; 7662 7663 if ((NegHi & (1 << J)) != 0) 7664 ModVal |= SISrcMods::NEG_HI; 7665 7666 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7667 7668 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7669 } 7670 } 7671 7672 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7673 OptionalImmIndexMap OptIdx; 7674 cvtVOP3(Inst, Operands, OptIdx); 7675 cvtVOP3P(Inst, Operands, OptIdx); 7676 } 7677 7678 //===----------------------------------------------------------------------===// 7679 // dpp 7680 //===----------------------------------------------------------------------===// 7681 7682 bool AMDGPUOperand::isDPP8() const { 7683 return isImmTy(ImmTyDPP8); 7684 } 7685 7686 bool AMDGPUOperand::isDPPCtrl() const { 7687 using namespace AMDGPU::DPP; 7688 7689 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7690 if (result) { 7691 int64_t Imm = getImm(); 7692 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7693 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7694 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7695 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7696 (Imm == DppCtrl::WAVE_SHL1) || 7697 (Imm == DppCtrl::WAVE_ROL1) || 7698 (Imm == DppCtrl::WAVE_SHR1) || 7699 (Imm == DppCtrl::WAVE_ROR1) || 7700 (Imm == DppCtrl::ROW_MIRROR) || 7701 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7702 (Imm == DppCtrl::BCAST15) || 7703 (Imm == DppCtrl::BCAST31) || 7704 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7705 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7706 } 7707 return false; 7708 } 7709 7710 //===----------------------------------------------------------------------===// 7711 // mAI 7712 //===----------------------------------------------------------------------===// 7713 7714 bool AMDGPUOperand::isBLGP() const { 7715 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7716 } 7717 7718 bool AMDGPUOperand::isCBSZ() const { 7719 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7720 } 7721 7722 bool AMDGPUOperand::isABID() const { 7723 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7724 } 7725 7726 bool AMDGPUOperand::isS16Imm() const { 7727 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7728 } 7729 7730 bool AMDGPUOperand::isU16Imm() const { 7731 return isImm() && isUInt<16>(getImm()); 7732 } 7733 7734 //===----------------------------------------------------------------------===// 7735 // dim 7736 //===----------------------------------------------------------------------===// 7737 7738 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7739 // We want to allow "dim:1D" etc., 7740 // but the initial 1 is tokenized as an integer. 7741 std::string Token; 7742 if (isToken(AsmToken::Integer)) { 7743 SMLoc Loc = getToken().getEndLoc(); 7744 Token = std::string(getTokenStr()); 7745 lex(); 7746 if (getLoc() != Loc) 7747 return false; 7748 } 7749 7750 StringRef Suffix; 7751 if (!parseId(Suffix)) 7752 return false; 7753 Token += Suffix; 7754 7755 StringRef DimId = Token; 7756 if (DimId.startswith("SQ_RSRC_IMG_")) 7757 DimId = DimId.drop_front(12); 7758 7759 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7760 if (!DimInfo) 7761 return false; 7762 7763 Encoding = DimInfo->Encoding; 7764 return true; 7765 } 7766 7767 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7768 if (!isGFX10Plus()) 7769 return MatchOperand_NoMatch; 7770 7771 SMLoc S = getLoc(); 7772 7773 if (!trySkipId("dim", AsmToken::Colon)) 7774 return MatchOperand_NoMatch; 7775 7776 unsigned Encoding; 7777 SMLoc Loc = getLoc(); 7778 if (!parseDimId(Encoding)) { 7779 Error(Loc, "invalid dim value"); 7780 return MatchOperand_ParseFail; 7781 } 7782 7783 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7784 AMDGPUOperand::ImmTyDim)); 7785 return MatchOperand_Success; 7786 } 7787 7788 //===----------------------------------------------------------------------===// 7789 // dpp 7790 //===----------------------------------------------------------------------===// 7791 7792 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7793 SMLoc S = getLoc(); 7794 7795 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7796 return MatchOperand_NoMatch; 7797 7798 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7799 7800 int64_t Sels[8]; 7801 7802 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7803 return MatchOperand_ParseFail; 7804 7805 for (size_t i = 0; i < 8; ++i) { 7806 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7807 return MatchOperand_ParseFail; 7808 7809 SMLoc Loc = getLoc(); 7810 if (getParser().parseAbsoluteExpression(Sels[i])) 7811 return MatchOperand_ParseFail; 7812 if (0 > Sels[i] || 7 < Sels[i]) { 7813 Error(Loc, "expected a 3-bit value"); 7814 return MatchOperand_ParseFail; 7815 } 7816 } 7817 7818 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7819 return MatchOperand_ParseFail; 7820 7821 unsigned DPP8 = 0; 7822 for (size_t i = 0; i < 8; ++i) 7823 DPP8 |= (Sels[i] << (i * 3)); 7824 7825 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7826 return MatchOperand_Success; 7827 } 7828 7829 bool 7830 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7831 const OperandVector &Operands) { 7832 if (Ctrl == "row_newbcast") 7833 return isGFX90A(); 7834 7835 if (Ctrl == "row_share" || 7836 Ctrl == "row_xmask") 7837 return isGFX10Plus(); 7838 7839 if (Ctrl == "wave_shl" || 7840 Ctrl == "wave_shr" || 7841 Ctrl == "wave_rol" || 7842 Ctrl == "wave_ror" || 7843 Ctrl == "row_bcast") 7844 return isVI() || isGFX9(); 7845 7846 return Ctrl == "row_mirror" || 7847 Ctrl == "row_half_mirror" || 7848 Ctrl == "quad_perm" || 7849 Ctrl == "row_shl" || 7850 Ctrl == "row_shr" || 7851 Ctrl == "row_ror"; 7852 } 7853 7854 int64_t 7855 AMDGPUAsmParser::parseDPPCtrlPerm() { 7856 // quad_perm:[%d,%d,%d,%d] 7857 7858 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7859 return -1; 7860 7861 int64_t Val = 0; 7862 for (int i = 0; i < 4; ++i) { 7863 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7864 return -1; 7865 7866 int64_t Temp; 7867 SMLoc Loc = getLoc(); 7868 if (getParser().parseAbsoluteExpression(Temp)) 7869 return -1; 7870 if (Temp < 0 || Temp > 3) { 7871 Error(Loc, "expected a 2-bit value"); 7872 return -1; 7873 } 7874 7875 Val += (Temp << i * 2); 7876 } 7877 7878 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7879 return -1; 7880 7881 return Val; 7882 } 7883 7884 int64_t 7885 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7886 using namespace AMDGPU::DPP; 7887 7888 // sel:%d 7889 7890 int64_t Val; 7891 SMLoc Loc = getLoc(); 7892 7893 if (getParser().parseAbsoluteExpression(Val)) 7894 return -1; 7895 7896 struct DppCtrlCheck { 7897 int64_t Ctrl; 7898 int Lo; 7899 int Hi; 7900 }; 7901 7902 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7903 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7904 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7905 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7906 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7907 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7908 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7909 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7910 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7911 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7912 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7913 .Default({-1, 0, 0}); 7914 7915 bool Valid; 7916 if (Check.Ctrl == -1) { 7917 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7918 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7919 } else { 7920 Valid = Check.Lo <= Val && Val <= Check.Hi; 7921 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7922 } 7923 7924 if (!Valid) { 7925 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7926 return -1; 7927 } 7928 7929 return Val; 7930 } 7931 7932 OperandMatchResultTy 7933 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7934 using namespace AMDGPU::DPP; 7935 7936 if (!isToken(AsmToken::Identifier) || 7937 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7938 return MatchOperand_NoMatch; 7939 7940 SMLoc S = getLoc(); 7941 int64_t Val = -1; 7942 StringRef Ctrl; 7943 7944 parseId(Ctrl); 7945 7946 if (Ctrl == "row_mirror") { 7947 Val = DppCtrl::ROW_MIRROR; 7948 } else if (Ctrl == "row_half_mirror") { 7949 Val = DppCtrl::ROW_HALF_MIRROR; 7950 } else { 7951 if (skipToken(AsmToken::Colon, "expected a colon")) { 7952 if (Ctrl == "quad_perm") { 7953 Val = parseDPPCtrlPerm(); 7954 } else { 7955 Val = parseDPPCtrlSel(Ctrl); 7956 } 7957 } 7958 } 7959 7960 if (Val == -1) 7961 return MatchOperand_ParseFail; 7962 7963 Operands.push_back( 7964 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7965 return MatchOperand_Success; 7966 } 7967 7968 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7969 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7970 } 7971 7972 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7973 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7974 } 7975 7976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7977 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7978 } 7979 7980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7981 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7982 } 7983 7984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7985 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7986 } 7987 7988 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7989 OptionalImmIndexMap OptionalIdx; 7990 7991 unsigned Opc = Inst.getOpcode(); 7992 bool HasModifiers = 7993 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 7994 unsigned I = 1; 7995 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7996 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7997 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7998 } 7999 8000 int Fi = 0; 8001 for (unsigned E = Operands.size(); I != E; ++I) { 8002 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8003 MCOI::TIED_TO); 8004 if (TiedTo != -1) { 8005 assert((unsigned)TiedTo < Inst.getNumOperands()); 8006 // handle tied old or src2 for MAC instructions 8007 Inst.addOperand(Inst.getOperand(TiedTo)); 8008 } 8009 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8010 // Add the register arguments 8011 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8012 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8013 // Skip it. 8014 continue; 8015 } 8016 8017 if (IsDPP8) { 8018 if (Op.isDPP8()) { 8019 Op.addImmOperands(Inst, 1); 8020 } else if (HasModifiers && 8021 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8022 Op.addRegWithFPInputModsOperands(Inst, 2); 8023 } else if (Op.isFI()) { 8024 Fi = Op.getImm(); 8025 } else if (Op.isReg()) { 8026 Op.addRegOperands(Inst, 1); 8027 } else { 8028 llvm_unreachable("Invalid operand type"); 8029 } 8030 } else { 8031 if (HasModifiers && 8032 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8033 Op.addRegWithFPInputModsOperands(Inst, 2); 8034 } else if (Op.isReg()) { 8035 Op.addRegOperands(Inst, 1); 8036 } else if (Op.isDPPCtrl()) { 8037 Op.addImmOperands(Inst, 1); 8038 } else if (Op.isImm()) { 8039 // Handle optional arguments 8040 OptionalIdx[Op.getImmTy()] = I; 8041 } else { 8042 llvm_unreachable("Invalid operand type"); 8043 } 8044 } 8045 } 8046 8047 if (IsDPP8) { 8048 using namespace llvm::AMDGPU::DPP; 8049 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8050 } else { 8051 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8053 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8054 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8055 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8056 } 8057 } 8058 } 8059 8060 //===----------------------------------------------------------------------===// 8061 // sdwa 8062 //===----------------------------------------------------------------------===// 8063 8064 OperandMatchResultTy 8065 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8066 AMDGPUOperand::ImmTy Type) { 8067 using namespace llvm::AMDGPU::SDWA; 8068 8069 SMLoc S = getLoc(); 8070 StringRef Value; 8071 OperandMatchResultTy res; 8072 8073 SMLoc StringLoc; 8074 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8075 if (res != MatchOperand_Success) { 8076 return res; 8077 } 8078 8079 int64_t Int; 8080 Int = StringSwitch<int64_t>(Value) 8081 .Case("BYTE_0", SdwaSel::BYTE_0) 8082 .Case("BYTE_1", SdwaSel::BYTE_1) 8083 .Case("BYTE_2", SdwaSel::BYTE_2) 8084 .Case("BYTE_3", SdwaSel::BYTE_3) 8085 .Case("WORD_0", SdwaSel::WORD_0) 8086 .Case("WORD_1", SdwaSel::WORD_1) 8087 .Case("DWORD", SdwaSel::DWORD) 8088 .Default(0xffffffff); 8089 8090 if (Int == 0xffffffff) { 8091 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8092 return MatchOperand_ParseFail; 8093 } 8094 8095 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8096 return MatchOperand_Success; 8097 } 8098 8099 OperandMatchResultTy 8100 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8101 using namespace llvm::AMDGPU::SDWA; 8102 8103 SMLoc S = getLoc(); 8104 StringRef Value; 8105 OperandMatchResultTy res; 8106 8107 SMLoc StringLoc; 8108 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8109 if (res != MatchOperand_Success) { 8110 return res; 8111 } 8112 8113 int64_t Int; 8114 Int = StringSwitch<int64_t>(Value) 8115 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8116 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8117 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8118 .Default(0xffffffff); 8119 8120 if (Int == 0xffffffff) { 8121 Error(StringLoc, "invalid dst_unused value"); 8122 return MatchOperand_ParseFail; 8123 } 8124 8125 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8126 return MatchOperand_Success; 8127 } 8128 8129 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8130 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8131 } 8132 8133 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8134 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8135 } 8136 8137 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8138 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8139 } 8140 8141 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8142 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8143 } 8144 8145 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8146 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8147 } 8148 8149 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8150 uint64_t BasicInstType, 8151 bool SkipDstVcc, 8152 bool SkipSrcVcc) { 8153 using namespace llvm::AMDGPU::SDWA; 8154 8155 OptionalImmIndexMap OptionalIdx; 8156 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8157 bool SkippedVcc = false; 8158 8159 unsigned I = 1; 8160 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8161 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8162 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8163 } 8164 8165 for (unsigned E = Operands.size(); I != E; ++I) { 8166 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8167 if (SkipVcc && !SkippedVcc && Op.isReg() && 8168 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8169 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8170 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8171 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8172 // Skip VCC only if we didn't skip it on previous iteration. 8173 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8174 if (BasicInstType == SIInstrFlags::VOP2 && 8175 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8176 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8177 SkippedVcc = true; 8178 continue; 8179 } else if (BasicInstType == SIInstrFlags::VOPC && 8180 Inst.getNumOperands() == 0) { 8181 SkippedVcc = true; 8182 continue; 8183 } 8184 } 8185 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8186 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8187 } else if (Op.isImm()) { 8188 // Handle optional arguments 8189 OptionalIdx[Op.getImmTy()] = I; 8190 } else { 8191 llvm_unreachable("Invalid operand type"); 8192 } 8193 SkippedVcc = false; 8194 } 8195 8196 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8197 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8198 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8199 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8200 switch (BasicInstType) { 8201 case SIInstrFlags::VOP1: 8202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8203 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8204 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8205 } 8206 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8207 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8208 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8209 break; 8210 8211 case SIInstrFlags::VOP2: 8212 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8213 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8214 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8215 } 8216 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8217 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8219 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8220 break; 8221 8222 case SIInstrFlags::VOPC: 8223 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8224 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8225 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8227 break; 8228 8229 default: 8230 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8231 } 8232 } 8233 8234 // special case v_mac_{f16, f32}: 8235 // it has src2 register operand that is tied to dst operand 8236 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8237 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8238 auto it = Inst.begin(); 8239 std::advance( 8240 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8241 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8242 } 8243 } 8244 8245 //===----------------------------------------------------------------------===// 8246 // mAI 8247 //===----------------------------------------------------------------------===// 8248 8249 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8250 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8251 } 8252 8253 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8254 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8255 } 8256 8257 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8258 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8259 } 8260 8261 /// Force static initialization. 8262 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8263 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8264 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8265 } 8266 8267 #define GET_REGISTER_MATCHER 8268 #define GET_MATCHER_IMPLEMENTATION 8269 #define GET_MNEMONIC_SPELL_CHECKER 8270 #define GET_MNEMONIC_CHECKER 8271 #include "AMDGPUGenAsmMatcher.inc" 8272 8273 // This fuction should be defined after auto-generated include so that we have 8274 // MatchClassKind enum defined 8275 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8276 unsigned Kind) { 8277 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8278 // But MatchInstructionImpl() expects to meet token and fails to validate 8279 // operand. This method checks if we are given immediate operand but expect to 8280 // get corresponding token. 8281 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8282 switch (Kind) { 8283 case MCK_addr64: 8284 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8285 case MCK_gds: 8286 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8287 case MCK_lds: 8288 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8289 case MCK_idxen: 8290 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8291 case MCK_offen: 8292 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8293 case MCK_SSrcB32: 8294 // When operands have expression values, they will return true for isToken, 8295 // because it is not possible to distinguish between a token and an 8296 // expression at parse time. MatchInstructionImpl() will always try to 8297 // match an operand as a token, when isToken returns true, and when the 8298 // name of the expression is not a valid token, the match will fail, 8299 // so we need to handle it here. 8300 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8301 case MCK_SSrcF32: 8302 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8303 case MCK_SoppBrTarget: 8304 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8305 case MCK_VReg32OrOff: 8306 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8307 case MCK_InterpSlot: 8308 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8309 case MCK_Attr: 8310 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8311 case MCK_AttrChan: 8312 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8313 case MCK_ImmSMEMOffset: 8314 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8315 case MCK_SReg_64: 8316 case MCK_SReg_64_XEXEC: 8317 // Null is defined as a 32-bit register but 8318 // it should also be enabled with 64-bit operands. 8319 // The following code enables it for SReg_64 operands 8320 // used as source and destination. Remaining source 8321 // operands are handled in isInlinableImm. 8322 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8323 default: 8324 return Match_InvalidOperand; 8325 } 8326 } 8327 8328 //===----------------------------------------------------------------------===// 8329 // endpgm 8330 //===----------------------------------------------------------------------===// 8331 8332 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8333 SMLoc S = getLoc(); 8334 int64_t Imm = 0; 8335 8336 if (!parseExpr(Imm)) { 8337 // The operand is optional, if not present default to 0 8338 Imm = 0; 8339 } 8340 8341 if (!isUInt<16>(Imm)) { 8342 Error(S, "expected a 16-bit value"); 8343 return MatchOperand_ParseFail; 8344 } 8345 8346 Operands.push_back( 8347 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8348 return MatchOperand_Success; 8349 } 8350 8351 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8352