1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 ImmTyWaitVDST, 165 }; 166 167 enum ImmKindTy { 168 ImmKindTyNone, 169 ImmKindTyLiteral, 170 ImmKindTyConst, 171 }; 172 173 private: 174 struct TokOp { 175 const char *Data; 176 unsigned Length; 177 }; 178 179 struct ImmOp { 180 int64_t Val; 181 ImmTy Type; 182 bool IsFPImm; 183 mutable ImmKindTy Kind; 184 Modifiers Mods; 185 }; 186 187 struct RegOp { 188 unsigned RegNo; 189 Modifiers Mods; 190 }; 191 192 union { 193 TokOp Tok; 194 ImmOp Imm; 195 RegOp Reg; 196 const MCExpr *Expr; 197 }; 198 199 public: 200 bool isToken() const override { 201 if (Kind == Token) 202 return true; 203 204 // When parsing operands, we can't always tell if something was meant to be 205 // a token, like 'gds', or an expression that references a global variable. 206 // In this case, we assume the string is an expression, and if we need to 207 // interpret is a token, then we treat the symbol name as the token. 208 return isSymbolRefExpr(); 209 } 210 211 bool isSymbolRefExpr() const { 212 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 213 } 214 215 bool isImm() const override { 216 return Kind == Immediate; 217 } 218 219 void setImmKindNone() const { 220 assert(isImm()); 221 Imm.Kind = ImmKindTyNone; 222 } 223 224 void setImmKindLiteral() const { 225 assert(isImm()); 226 Imm.Kind = ImmKindTyLiteral; 227 } 228 229 void setImmKindConst() const { 230 assert(isImm()); 231 Imm.Kind = ImmKindTyConst; 232 } 233 234 bool IsImmKindLiteral() const { 235 return isImm() && Imm.Kind == ImmKindTyLiteral; 236 } 237 238 bool isImmKindConst() const { 239 return isImm() && Imm.Kind == ImmKindTyConst; 240 } 241 242 bool isInlinableImm(MVT type) const; 243 bool isLiteralImm(MVT type) const; 244 245 bool isRegKind() const { 246 return Kind == Register; 247 } 248 249 bool isReg() const override { 250 return isRegKind() && !hasModifiers(); 251 } 252 253 bool isRegOrInline(unsigned RCID, MVT type) const { 254 return isRegClass(RCID) || isInlinableImm(type); 255 } 256 257 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 258 return isRegOrInline(RCID, type) || isLiteralImm(type); 259 } 260 261 bool isRegOrImmWithInt16InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 263 } 264 265 bool isRegOrImmWithInt32InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 267 } 268 269 bool isRegOrImmWithInt64InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 271 } 272 273 bool isRegOrImmWithFP16InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 275 } 276 277 bool isRegOrImmWithFP32InputMods() const { 278 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 279 } 280 281 bool isRegOrImmWithFP64InputMods() const { 282 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 283 } 284 285 bool isVReg() const { 286 return isRegClass(AMDGPU::VGPR_32RegClassID) || 287 isRegClass(AMDGPU::VReg_64RegClassID) || 288 isRegClass(AMDGPU::VReg_96RegClassID) || 289 isRegClass(AMDGPU::VReg_128RegClassID) || 290 isRegClass(AMDGPU::VReg_160RegClassID) || 291 isRegClass(AMDGPU::VReg_192RegClassID) || 292 isRegClass(AMDGPU::VReg_256RegClassID) || 293 isRegClass(AMDGPU::VReg_512RegClassID) || 294 isRegClass(AMDGPU::VReg_1024RegClassID); 295 } 296 297 bool isVReg32() const { 298 return isRegClass(AMDGPU::VGPR_32RegClassID); 299 } 300 301 bool isVReg32OrOff() const { 302 return isOff() || isVReg32(); 303 } 304 305 bool isNull() const { 306 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 307 } 308 309 bool isVRegWithInputMods() const; 310 311 bool isSDWAOperand(MVT type) const; 312 bool isSDWAFP16Operand() const; 313 bool isSDWAFP32Operand() const; 314 bool isSDWAInt16Operand() const; 315 bool isSDWAInt32Operand() const; 316 317 bool isImmTy(ImmTy ImmT) const { 318 return isImm() && Imm.Type == ImmT; 319 } 320 321 bool isImmModifier() const { 322 return isImm() && Imm.Type != ImmTyNone; 323 } 324 325 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 326 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 327 bool isDMask() const { return isImmTy(ImmTyDMask); } 328 bool isDim() const { return isImmTy(ImmTyDim); } 329 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 330 bool isDA() const { return isImmTy(ImmTyDA); } 331 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 332 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 333 bool isLWE() const { return isImmTy(ImmTyLWE); } 334 bool isOff() const { return isImmTy(ImmTyOff); } 335 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 336 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 337 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 338 bool isOffen() const { return isImmTy(ImmTyOffen); } 339 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 340 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 341 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 342 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 343 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 344 345 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 346 bool isGDS() const { return isImmTy(ImmTyGDS); } 347 bool isLDS() const { return isImmTy(ImmTyLDS); } 348 bool isCPol() const { return isImmTy(ImmTyCPol); } 349 bool isSWZ() const { return isImmTy(ImmTySWZ); } 350 bool isTFE() const { return isImmTy(ImmTyTFE); } 351 bool isD16() const { return isImmTy(ImmTyD16); } 352 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 353 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 354 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 355 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 356 bool isFI() const { return isImmTy(ImmTyDppFi); } 357 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 358 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 359 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 360 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 361 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 362 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 363 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 364 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 365 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 366 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 367 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 368 bool isHigh() const { return isImmTy(ImmTyHigh); } 369 370 bool isMod() const { 371 return isClampSI() || isOModSI(); 372 } 373 374 bool isRegOrImm() const { 375 return isReg() || isImm(); 376 } 377 378 bool isRegClass(unsigned RCID) const; 379 380 bool isInlineValue() const; 381 382 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 383 return isRegOrInline(RCID, type) && !hasModifiers(); 384 } 385 386 bool isSCSrcB16() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 388 } 389 390 bool isSCSrcV2B16() const { 391 return isSCSrcB16(); 392 } 393 394 bool isSCSrcB32() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 396 } 397 398 bool isSCSrcB64() const { 399 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 400 } 401 402 bool isBoolReg() const; 403 404 bool isSCSrcF16() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 406 } 407 408 bool isSCSrcV2F16() const { 409 return isSCSrcF16(); 410 } 411 412 bool isSCSrcF32() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 414 } 415 416 bool isSCSrcF64() const { 417 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 418 } 419 420 bool isSSrcB32() const { 421 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 422 } 423 424 bool isSSrcB16() const { 425 return isSCSrcB16() || isLiteralImm(MVT::i16); 426 } 427 428 bool isSSrcV2B16() const { 429 llvm_unreachable("cannot happen"); 430 return isSSrcB16(); 431 } 432 433 bool isSSrcB64() const { 434 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 435 // See isVSrc64(). 436 return isSCSrcB64() || isLiteralImm(MVT::i64); 437 } 438 439 bool isSSrcF32() const { 440 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 441 } 442 443 bool isSSrcF64() const { 444 return isSCSrcB64() || isLiteralImm(MVT::f64); 445 } 446 447 bool isSSrcF16() const { 448 return isSCSrcB16() || isLiteralImm(MVT::f16); 449 } 450 451 bool isSSrcV2F16() const { 452 llvm_unreachable("cannot happen"); 453 return isSSrcF16(); 454 } 455 456 bool isSSrcV2FP32() const { 457 llvm_unreachable("cannot happen"); 458 return isSSrcF32(); 459 } 460 461 bool isSCSrcV2FP32() const { 462 llvm_unreachable("cannot happen"); 463 return isSCSrcF32(); 464 } 465 466 bool isSSrcV2INT32() const { 467 llvm_unreachable("cannot happen"); 468 return isSSrcB32(); 469 } 470 471 bool isSCSrcV2INT32() const { 472 llvm_unreachable("cannot happen"); 473 return isSCSrcB32(); 474 } 475 476 bool isSSrcOrLdsB32() const { 477 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 478 isLiteralImm(MVT::i32) || isExpr(); 479 } 480 481 bool isVCSrcB32() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 483 } 484 485 bool isVCSrcB64() const { 486 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 487 } 488 489 bool isVCSrcB16() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 491 } 492 493 bool isVCSrcV2B16() const { 494 return isVCSrcB16(); 495 } 496 497 bool isVCSrcF32() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 499 } 500 501 bool isVCSrcF64() const { 502 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 503 } 504 505 bool isVCSrcF16() const { 506 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 507 } 508 509 bool isVCSrcV2F16() const { 510 return isVCSrcF16(); 511 } 512 513 bool isVSrcB32() const { 514 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 515 } 516 517 bool isVSrcB64() const { 518 return isVCSrcF64() || isLiteralImm(MVT::i64); 519 } 520 521 bool isVSrcB16() const { 522 return isVCSrcB16() || isLiteralImm(MVT::i16); 523 } 524 525 bool isVSrcV2B16() const { 526 return isVSrcB16() || isLiteralImm(MVT::v2i16); 527 } 528 529 bool isVCSrcV2FP32() const { 530 return isVCSrcF64(); 531 } 532 533 bool isVSrcV2FP32() const { 534 return isVSrcF64() || isLiteralImm(MVT::v2f32); 535 } 536 537 bool isVCSrcV2INT32() const { 538 return isVCSrcB64(); 539 } 540 541 bool isVSrcV2INT32() const { 542 return isVSrcB64() || isLiteralImm(MVT::v2i32); 543 } 544 545 bool isVSrcF32() const { 546 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 547 } 548 549 bool isVSrcF64() const { 550 return isVCSrcF64() || isLiteralImm(MVT::f64); 551 } 552 553 bool isVSrcF16() const { 554 return isVCSrcF16() || isLiteralImm(MVT::f16); 555 } 556 557 bool isVSrcV2F16() const { 558 return isVSrcF16() || isLiteralImm(MVT::v2f16); 559 } 560 561 bool isVISrcB32() const { 562 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 563 } 564 565 bool isVISrcB16() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 567 } 568 569 bool isVISrcV2B16() const { 570 return isVISrcB16(); 571 } 572 573 bool isVISrcF32() const { 574 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 575 } 576 577 bool isVISrcF16() const { 578 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 579 } 580 581 bool isVISrcV2F16() const { 582 return isVISrcF16() || isVISrcB32(); 583 } 584 585 bool isVISrc_64B64() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 587 } 588 589 bool isVISrc_64F64() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 591 } 592 593 bool isVISrc_64V2FP32() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 595 } 596 597 bool isVISrc_64V2INT32() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 599 } 600 601 bool isVISrc_256B64() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 603 } 604 605 bool isVISrc_256F64() const { 606 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 607 } 608 609 bool isVISrc_128B16() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 611 } 612 613 bool isVISrc_128V2B16() const { 614 return isVISrc_128B16(); 615 } 616 617 bool isVISrc_128B32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 619 } 620 621 bool isVISrc_128F32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 623 } 624 625 bool isVISrc_256V2FP32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 627 } 628 629 bool isVISrc_256V2INT32() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 631 } 632 633 bool isVISrc_512B32() const { 634 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 635 } 636 637 bool isVISrc_512B16() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 639 } 640 641 bool isVISrc_512V2B16() const { 642 return isVISrc_512B16(); 643 } 644 645 bool isVISrc_512F32() const { 646 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 647 } 648 649 bool isVISrc_512F16() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 651 } 652 653 bool isVISrc_512V2F16() const { 654 return isVISrc_512F16() || isVISrc_512B32(); 655 } 656 657 bool isVISrc_1024B32() const { 658 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 659 } 660 661 bool isVISrc_1024B16() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 663 } 664 665 bool isVISrc_1024V2B16() const { 666 return isVISrc_1024B16(); 667 } 668 669 bool isVISrc_1024F32() const { 670 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 671 } 672 673 bool isVISrc_1024F16() const { 674 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 675 } 676 677 bool isVISrc_1024V2F16() const { 678 return isVISrc_1024F16() || isVISrc_1024B32(); 679 } 680 681 bool isAISrcB32() const { 682 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 683 } 684 685 bool isAISrcB16() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 687 } 688 689 bool isAISrcV2B16() const { 690 return isAISrcB16(); 691 } 692 693 bool isAISrcF32() const { 694 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 695 } 696 697 bool isAISrcF16() const { 698 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 699 } 700 701 bool isAISrcV2F16() const { 702 return isAISrcF16() || isAISrcB32(); 703 } 704 705 bool isAISrc_64B64() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 707 } 708 709 bool isAISrc_64F64() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 711 } 712 713 bool isAISrc_128B32() const { 714 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 715 } 716 717 bool isAISrc_128B16() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 719 } 720 721 bool isAISrc_128V2B16() const { 722 return isAISrc_128B16(); 723 } 724 725 bool isAISrc_128F32() const { 726 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 727 } 728 729 bool isAISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 731 } 732 733 bool isAISrc_128V2F16() const { 734 return isAISrc_128F16() || isAISrc_128B32(); 735 } 736 737 bool isVISrc_128F16() const { 738 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 739 } 740 741 bool isVISrc_128V2F16() const { 742 return isVISrc_128F16() || isVISrc_128B32(); 743 } 744 745 bool isAISrc_256B64() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 747 } 748 749 bool isAISrc_256F64() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 751 } 752 753 bool isAISrc_512B32() const { 754 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 755 } 756 757 bool isAISrc_512B16() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 759 } 760 761 bool isAISrc_512V2B16() const { 762 return isAISrc_512B16(); 763 } 764 765 bool isAISrc_512F32() const { 766 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 767 } 768 769 bool isAISrc_512F16() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 771 } 772 773 bool isAISrc_512V2F16() const { 774 return isAISrc_512F16() || isAISrc_512B32(); 775 } 776 777 bool isAISrc_1024B32() const { 778 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 779 } 780 781 bool isAISrc_1024B16() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 783 } 784 785 bool isAISrc_1024V2B16() const { 786 return isAISrc_1024B16(); 787 } 788 789 bool isAISrc_1024F32() const { 790 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 791 } 792 793 bool isAISrc_1024F16() const { 794 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 795 } 796 797 bool isAISrc_1024V2F16() const { 798 return isAISrc_1024F16() || isAISrc_1024B32(); 799 } 800 801 bool isKImmFP32() const { 802 return isLiteralImm(MVT::f32); 803 } 804 805 bool isKImmFP16() const { 806 return isLiteralImm(MVT::f16); 807 } 808 809 bool isMem() const override { 810 return false; 811 } 812 813 bool isExpr() const { 814 return Kind == Expression; 815 } 816 817 bool isSoppBrTarget() const { 818 return isExpr() || isImm(); 819 } 820 821 bool isSWaitCnt() const; 822 bool isDepCtr() const; 823 bool isSDelayAlu() const; 824 bool isHwreg() const; 825 bool isSendMsg() const; 826 bool isSwizzle() const; 827 bool isSMRDOffset8() const; 828 bool isSMEMOffset() const; 829 bool isSMRDLiteralOffset() const; 830 bool isDPP8() const; 831 bool isDPPCtrl() const; 832 bool isBLGP() const; 833 bool isCBSZ() const; 834 bool isABID() const; 835 bool isGPRIdxMode() const; 836 bool isS16Imm() const; 837 bool isU16Imm() const; 838 bool isEndpgm() const; 839 bool isWaitVDST() const; 840 841 StringRef getExpressionAsToken() const { 842 assert(isExpr()); 843 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 844 return S->getSymbol().getName(); 845 } 846 847 StringRef getToken() const { 848 assert(isToken()); 849 850 if (Kind == Expression) 851 return getExpressionAsToken(); 852 853 return StringRef(Tok.Data, Tok.Length); 854 } 855 856 int64_t getImm() const { 857 assert(isImm()); 858 return Imm.Val; 859 } 860 861 void setImm(int64_t Val) { 862 assert(isImm()); 863 Imm.Val = Val; 864 } 865 866 ImmTy getImmTy() const { 867 assert(isImm()); 868 return Imm.Type; 869 } 870 871 unsigned getReg() const override { 872 assert(isRegKind()); 873 return Reg.RegNo; 874 } 875 876 SMLoc getStartLoc() const override { 877 return StartLoc; 878 } 879 880 SMLoc getEndLoc() const override { 881 return EndLoc; 882 } 883 884 SMRange getLocRange() const { 885 return SMRange(StartLoc, EndLoc); 886 } 887 888 Modifiers getModifiers() const { 889 assert(isRegKind() || isImmTy(ImmTyNone)); 890 return isRegKind() ? Reg.Mods : Imm.Mods; 891 } 892 893 void setModifiers(Modifiers Mods) { 894 assert(isRegKind() || isImmTy(ImmTyNone)); 895 if (isRegKind()) 896 Reg.Mods = Mods; 897 else 898 Imm.Mods = Mods; 899 } 900 901 bool hasModifiers() const { 902 return getModifiers().hasModifiers(); 903 } 904 905 bool hasFPModifiers() const { 906 return getModifiers().hasFPModifiers(); 907 } 908 909 bool hasIntModifiers() const { 910 return getModifiers().hasIntModifiers(); 911 } 912 913 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 914 915 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 916 917 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 918 919 template <unsigned Bitwidth> 920 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 921 922 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 923 addKImmFPOperands<16>(Inst, N); 924 } 925 926 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 927 addKImmFPOperands<32>(Inst, N); 928 } 929 930 void addRegOperands(MCInst &Inst, unsigned N) const; 931 932 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 933 addRegOperands(Inst, N); 934 } 935 936 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 937 if (isRegKind()) 938 addRegOperands(Inst, N); 939 else if (isExpr()) 940 Inst.addOperand(MCOperand::createExpr(Expr)); 941 else 942 addImmOperands(Inst, N); 943 } 944 945 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 946 Modifiers Mods = getModifiers(); 947 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 948 if (isRegKind()) { 949 addRegOperands(Inst, N); 950 } else { 951 addImmOperands(Inst, N, false); 952 } 953 } 954 955 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 956 assert(!hasIntModifiers()); 957 addRegOrImmWithInputModsOperands(Inst, N); 958 } 959 960 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 961 assert(!hasFPModifiers()); 962 addRegOrImmWithInputModsOperands(Inst, N); 963 } 964 965 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 966 Modifiers Mods = getModifiers(); 967 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 968 assert(isRegKind()); 969 addRegOperands(Inst, N); 970 } 971 972 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 973 assert(!hasIntModifiers()); 974 addRegWithInputModsOperands(Inst, N); 975 } 976 977 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 978 assert(!hasFPModifiers()); 979 addRegWithInputModsOperands(Inst, N); 980 } 981 982 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 983 if (isImm()) 984 addImmOperands(Inst, N); 985 else { 986 assert(isExpr()); 987 Inst.addOperand(MCOperand::createExpr(Expr)); 988 } 989 } 990 991 static void printImmTy(raw_ostream& OS, ImmTy Type) { 992 switch (Type) { 993 case ImmTyNone: OS << "None"; break; 994 case ImmTyGDS: OS << "GDS"; break; 995 case ImmTyLDS: OS << "LDS"; break; 996 case ImmTyOffen: OS << "Offen"; break; 997 case ImmTyIdxen: OS << "Idxen"; break; 998 case ImmTyAddr64: OS << "Addr64"; break; 999 case ImmTyOffset: OS << "Offset"; break; 1000 case ImmTyInstOffset: OS << "InstOffset"; break; 1001 case ImmTyOffset0: OS << "Offset0"; break; 1002 case ImmTyOffset1: OS << "Offset1"; break; 1003 case ImmTyCPol: OS << "CPol"; break; 1004 case ImmTySWZ: OS << "SWZ"; break; 1005 case ImmTyTFE: OS << "TFE"; break; 1006 case ImmTyD16: OS << "D16"; break; 1007 case ImmTyFORMAT: OS << "FORMAT"; break; 1008 case ImmTyClampSI: OS << "ClampSI"; break; 1009 case ImmTyOModSI: OS << "OModSI"; break; 1010 case ImmTyDPP8: OS << "DPP8"; break; 1011 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1012 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1013 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1014 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1015 case ImmTyDppFi: OS << "FI"; break; 1016 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1017 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1018 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1019 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1020 case ImmTyDMask: OS << "DMask"; break; 1021 case ImmTyDim: OS << "Dim"; break; 1022 case ImmTyUNorm: OS << "UNorm"; break; 1023 case ImmTyDA: OS << "DA"; break; 1024 case ImmTyR128A16: OS << "R128A16"; break; 1025 case ImmTyA16: OS << "A16"; break; 1026 case ImmTyLWE: OS << "LWE"; break; 1027 case ImmTyOff: OS << "Off"; break; 1028 case ImmTyExpTgt: OS << "ExpTgt"; break; 1029 case ImmTyExpCompr: OS << "ExpCompr"; break; 1030 case ImmTyExpVM: OS << "ExpVM"; break; 1031 case ImmTyHwreg: OS << "Hwreg"; break; 1032 case ImmTySendMsg: OS << "SendMsg"; break; 1033 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1034 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1035 case ImmTyAttrChan: OS << "AttrChan"; break; 1036 case ImmTyOpSel: OS << "OpSel"; break; 1037 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1038 case ImmTyNegLo: OS << "NegLo"; break; 1039 case ImmTyNegHi: OS << "NegHi"; break; 1040 case ImmTySwizzle: OS << "Swizzle"; break; 1041 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1042 case ImmTyHigh: OS << "High"; break; 1043 case ImmTyBLGP: OS << "BLGP"; break; 1044 case ImmTyCBSZ: OS << "CBSZ"; break; 1045 case ImmTyABID: OS << "ABID"; break; 1046 case ImmTyEndpgm: OS << "Endpgm"; break; 1047 case ImmTyWaitVDST: OS << "WaitVDST"; break; 1048 } 1049 } 1050 1051 void print(raw_ostream &OS) const override { 1052 switch (Kind) { 1053 case Register: 1054 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1055 break; 1056 case Immediate: 1057 OS << '<' << getImm(); 1058 if (getImmTy() != ImmTyNone) { 1059 OS << " type: "; printImmTy(OS, getImmTy()); 1060 } 1061 OS << " mods: " << Imm.Mods << '>'; 1062 break; 1063 case Token: 1064 OS << '\'' << getToken() << '\''; 1065 break; 1066 case Expression: 1067 OS << "<expr " << *Expr << '>'; 1068 break; 1069 } 1070 } 1071 1072 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1073 int64_t Val, SMLoc Loc, 1074 ImmTy Type = ImmTyNone, 1075 bool IsFPImm = false) { 1076 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1077 Op->Imm.Val = Val; 1078 Op->Imm.IsFPImm = IsFPImm; 1079 Op->Imm.Kind = ImmKindTyNone; 1080 Op->Imm.Type = Type; 1081 Op->Imm.Mods = Modifiers(); 1082 Op->StartLoc = Loc; 1083 Op->EndLoc = Loc; 1084 return Op; 1085 } 1086 1087 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1088 StringRef Str, SMLoc Loc, 1089 bool HasExplicitEncodingSize = true) { 1090 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1091 Res->Tok.Data = Str.data(); 1092 Res->Tok.Length = Str.size(); 1093 Res->StartLoc = Loc; 1094 Res->EndLoc = Loc; 1095 return Res; 1096 } 1097 1098 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1099 unsigned RegNo, SMLoc S, 1100 SMLoc E) { 1101 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1102 Op->Reg.RegNo = RegNo; 1103 Op->Reg.Mods = Modifiers(); 1104 Op->StartLoc = S; 1105 Op->EndLoc = E; 1106 return Op; 1107 } 1108 1109 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1110 const class MCExpr *Expr, SMLoc S) { 1111 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1112 Op->Expr = Expr; 1113 Op->StartLoc = S; 1114 Op->EndLoc = S; 1115 return Op; 1116 } 1117 }; 1118 1119 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1120 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1121 return OS; 1122 } 1123 1124 //===----------------------------------------------------------------------===// 1125 // AsmParser 1126 //===----------------------------------------------------------------------===// 1127 1128 // Holds info related to the current kernel, e.g. count of SGPRs used. 1129 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1130 // .amdgpu_hsa_kernel or at EOF. 1131 class KernelScopeInfo { 1132 int SgprIndexUnusedMin = -1; 1133 int VgprIndexUnusedMin = -1; 1134 int AgprIndexUnusedMin = -1; 1135 MCContext *Ctx = nullptr; 1136 MCSubtargetInfo const *MSTI = nullptr; 1137 1138 void usesSgprAt(int i) { 1139 if (i >= SgprIndexUnusedMin) { 1140 SgprIndexUnusedMin = ++i; 1141 if (Ctx) { 1142 MCSymbol* const Sym = 1143 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1144 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1145 } 1146 } 1147 } 1148 1149 void usesVgprAt(int i) { 1150 if (i >= VgprIndexUnusedMin) { 1151 VgprIndexUnusedMin = ++i; 1152 if (Ctx) { 1153 MCSymbol* const Sym = 1154 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1155 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1156 VgprIndexUnusedMin); 1157 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1158 } 1159 } 1160 } 1161 1162 void usesAgprAt(int i) { 1163 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1164 if (!hasMAIInsts(*MSTI)) 1165 return; 1166 1167 if (i >= AgprIndexUnusedMin) { 1168 AgprIndexUnusedMin = ++i; 1169 if (Ctx) { 1170 MCSymbol* const Sym = 1171 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1172 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1173 1174 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1175 MCSymbol* const vSym = 1176 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1177 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1178 VgprIndexUnusedMin); 1179 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1180 } 1181 } 1182 } 1183 1184 public: 1185 KernelScopeInfo() = default; 1186 1187 void initialize(MCContext &Context) { 1188 Ctx = &Context; 1189 MSTI = Ctx->getSubtargetInfo(); 1190 1191 usesSgprAt(SgprIndexUnusedMin = -1); 1192 usesVgprAt(VgprIndexUnusedMin = -1); 1193 if (hasMAIInsts(*MSTI)) { 1194 usesAgprAt(AgprIndexUnusedMin = -1); 1195 } 1196 } 1197 1198 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1199 unsigned RegWidth) { 1200 switch (RegKind) { 1201 case IS_SGPR: 1202 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1203 break; 1204 case IS_AGPR: 1205 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1206 break; 1207 case IS_VGPR: 1208 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1209 break; 1210 default: 1211 break; 1212 } 1213 } 1214 }; 1215 1216 class AMDGPUAsmParser : public MCTargetAsmParser { 1217 MCAsmParser &Parser; 1218 1219 // Number of extra operands parsed after the first optional operand. 1220 // This may be necessary to skip hardcoded mandatory operands. 1221 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1222 1223 unsigned ForcedEncodingSize = 0; 1224 bool ForcedDPP = false; 1225 bool ForcedSDWA = false; 1226 KernelScopeInfo KernelScope; 1227 unsigned CPolSeen; 1228 1229 /// @name Auto-generated Match Functions 1230 /// { 1231 1232 #define GET_ASSEMBLER_HEADER 1233 #include "AMDGPUGenAsmMatcher.inc" 1234 1235 /// } 1236 1237 private: 1238 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1239 bool OutOfRangeError(SMRange Range); 1240 /// Calculate VGPR/SGPR blocks required for given target, reserved 1241 /// registers, and user-specified NextFreeXGPR values. 1242 /// 1243 /// \param Features [in] Target features, used for bug corrections. 1244 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1245 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1246 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1247 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1248 /// descriptor field, if valid. 1249 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1250 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1251 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1252 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1253 /// \param VGPRBlocks [out] Result VGPR block count. 1254 /// \param SGPRBlocks [out] Result SGPR block count. 1255 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1256 bool FlatScrUsed, bool XNACKUsed, 1257 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1258 SMRange VGPRRange, unsigned NextFreeSGPR, 1259 SMRange SGPRRange, unsigned &VGPRBlocks, 1260 unsigned &SGPRBlocks); 1261 bool ParseDirectiveAMDGCNTarget(); 1262 bool ParseDirectiveAMDHSAKernel(); 1263 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1264 bool ParseDirectiveHSACodeObjectVersion(); 1265 bool ParseDirectiveHSACodeObjectISA(); 1266 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1267 bool ParseDirectiveAMDKernelCodeT(); 1268 // TODO: Possibly make subtargetHasRegister const. 1269 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1270 bool ParseDirectiveAMDGPUHsaKernel(); 1271 1272 bool ParseDirectiveISAVersion(); 1273 bool ParseDirectiveHSAMetadata(); 1274 bool ParseDirectivePALMetadataBegin(); 1275 bool ParseDirectivePALMetadata(); 1276 bool ParseDirectiveAMDGPULDS(); 1277 1278 /// Common code to parse out a block of text (typically YAML) between start and 1279 /// end directives. 1280 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1281 const char *AssemblerDirectiveEnd, 1282 std::string &CollectString); 1283 1284 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1285 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1286 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1287 unsigned &RegNum, unsigned &RegWidth, 1288 bool RestoreOnFailure = false); 1289 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1290 unsigned &RegNum, unsigned &RegWidth, 1291 SmallVectorImpl<AsmToken> &Tokens); 1292 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1293 unsigned &RegWidth, 1294 SmallVectorImpl<AsmToken> &Tokens); 1295 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1296 unsigned &RegWidth, 1297 SmallVectorImpl<AsmToken> &Tokens); 1298 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1299 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1300 bool ParseRegRange(unsigned& Num, unsigned& Width); 1301 unsigned getRegularReg(RegisterKind RegKind, 1302 unsigned RegNum, 1303 unsigned RegWidth, 1304 SMLoc Loc); 1305 1306 bool isRegister(); 1307 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1308 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1309 void initializeGprCountSymbol(RegisterKind RegKind); 1310 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1311 unsigned RegWidth); 1312 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1313 bool IsAtomic, bool IsLds = false); 1314 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1315 bool IsGdsHardcoded); 1316 1317 public: 1318 enum AMDGPUMatchResultTy { 1319 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1320 }; 1321 enum OperandMode { 1322 OperandMode_Default, 1323 OperandMode_NSA, 1324 }; 1325 1326 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1327 1328 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1329 const MCInstrInfo &MII, 1330 const MCTargetOptions &Options) 1331 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1332 MCAsmParserExtension::Initialize(Parser); 1333 1334 if (getFeatureBits().none()) { 1335 // Set default features. 1336 copySTI().ToggleFeature("southern-islands"); 1337 } 1338 1339 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1340 1341 { 1342 // TODO: make those pre-defined variables read-only. 1343 // Currently there is none suitable machinery in the core llvm-mc for this. 1344 // MCSymbol::isRedefinable is intended for another purpose, and 1345 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1346 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1347 MCContext &Ctx = getContext(); 1348 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1349 MCSymbol *Sym = 1350 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1351 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1352 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1353 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1354 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1355 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1356 } else { 1357 MCSymbol *Sym = 1358 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1359 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1360 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1361 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1362 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1363 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1364 } 1365 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1366 initializeGprCountSymbol(IS_VGPR); 1367 initializeGprCountSymbol(IS_SGPR); 1368 } else 1369 KernelScope.initialize(getContext()); 1370 } 1371 } 1372 1373 bool hasMIMG_R128() const { 1374 return AMDGPU::hasMIMG_R128(getSTI()); 1375 } 1376 1377 bool hasPackedD16() const { 1378 return AMDGPU::hasPackedD16(getSTI()); 1379 } 1380 1381 bool hasGFX10A16() const { 1382 return AMDGPU::hasGFX10A16(getSTI()); 1383 } 1384 1385 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1386 1387 bool isSI() const { 1388 return AMDGPU::isSI(getSTI()); 1389 } 1390 1391 bool isCI() const { 1392 return AMDGPU::isCI(getSTI()); 1393 } 1394 1395 bool isVI() const { 1396 return AMDGPU::isVI(getSTI()); 1397 } 1398 1399 bool isGFX9() const { 1400 return AMDGPU::isGFX9(getSTI()); 1401 } 1402 1403 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1404 bool isGFX90A() const { 1405 return AMDGPU::isGFX90A(getSTI()); 1406 } 1407 1408 bool isGFX940() const { 1409 return AMDGPU::isGFX940(getSTI()); 1410 } 1411 1412 bool isGFX9Plus() const { 1413 return AMDGPU::isGFX9Plus(getSTI()); 1414 } 1415 1416 bool isGFX10() const { 1417 return AMDGPU::isGFX10(getSTI()); 1418 } 1419 1420 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1421 1422 bool isGFX11() const { 1423 return AMDGPU::isGFX11(getSTI()); 1424 } 1425 1426 bool isGFX11Plus() const { 1427 return AMDGPU::isGFX11Plus(getSTI()); 1428 } 1429 1430 bool isGFX10_BEncoding() const { 1431 return AMDGPU::isGFX10_BEncoding(getSTI()); 1432 } 1433 1434 bool hasInv2PiInlineImm() const { 1435 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1436 } 1437 1438 bool hasFlatOffsets() const { 1439 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1440 } 1441 1442 bool hasArchitectedFlatScratch() const { 1443 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1444 } 1445 1446 bool hasSGPR102_SGPR103() const { 1447 return !isVI() && !isGFX9(); 1448 } 1449 1450 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1451 1452 bool hasIntClamp() const { 1453 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1454 } 1455 1456 AMDGPUTargetStreamer &getTargetStreamer() { 1457 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1458 return static_cast<AMDGPUTargetStreamer &>(TS); 1459 } 1460 1461 const MCRegisterInfo *getMRI() const { 1462 // We need this const_cast because for some reason getContext() is not const 1463 // in MCAsmParser. 1464 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1465 } 1466 1467 const MCInstrInfo *getMII() const { 1468 return &MII; 1469 } 1470 1471 const FeatureBitset &getFeatureBits() const { 1472 return getSTI().getFeatureBits(); 1473 } 1474 1475 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1476 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1477 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1478 1479 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1480 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1481 bool isForcedDPP() const { return ForcedDPP; } 1482 bool isForcedSDWA() const { return ForcedSDWA; } 1483 ArrayRef<unsigned> getMatchedVariants() const; 1484 StringRef getMatchedVariantName() const; 1485 1486 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1487 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1488 bool RestoreOnFailure); 1489 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1490 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1491 SMLoc &EndLoc) override; 1492 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1493 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1494 unsigned Kind) override; 1495 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1496 OperandVector &Operands, MCStreamer &Out, 1497 uint64_t &ErrorInfo, 1498 bool MatchingInlineAsm) override; 1499 bool ParseDirective(AsmToken DirectiveID) override; 1500 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1501 OperandMode Mode = OperandMode_Default); 1502 StringRef parseMnemonicSuffix(StringRef Name); 1503 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1504 SMLoc NameLoc, OperandVector &Operands) override; 1505 //bool ProcessInstruction(MCInst &Inst); 1506 1507 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1508 1509 OperandMatchResultTy 1510 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1511 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1512 bool (*ConvertResult)(int64_t &) = nullptr); 1513 1514 OperandMatchResultTy 1515 parseOperandArrayWithPrefix(const char *Prefix, 1516 OperandVector &Operands, 1517 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1518 bool (*ConvertResult)(int64_t&) = nullptr); 1519 1520 OperandMatchResultTy 1521 parseNamedBit(StringRef Name, OperandVector &Operands, 1522 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1523 OperandMatchResultTy parseCPol(OperandVector &Operands); 1524 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1525 StringRef &Value, 1526 SMLoc &StringLoc); 1527 1528 bool isModifier(); 1529 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1530 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1531 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1532 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1533 bool parseSP3NegModifier(); 1534 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1535 OperandMatchResultTy parseReg(OperandVector &Operands); 1536 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1537 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1538 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1539 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1540 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1541 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1542 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1543 OperandMatchResultTy parseUfmt(int64_t &Format); 1544 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1545 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1546 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1547 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1548 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1549 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1550 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1551 1552 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1553 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1554 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1555 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1556 1557 bool parseCnt(int64_t &IntVal); 1558 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1559 1560 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1561 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1562 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1563 1564 bool parseDelay(int64_t &Delay); 1565 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1566 1567 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1568 1569 private: 1570 struct OperandInfoTy { 1571 SMLoc Loc; 1572 int64_t Id; 1573 bool IsSymbolic = false; 1574 bool IsDefined = false; 1575 1576 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1577 }; 1578 1579 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1580 bool validateSendMsg(const OperandInfoTy &Msg, 1581 const OperandInfoTy &Op, 1582 const OperandInfoTy &Stream); 1583 1584 bool parseHwregBody(OperandInfoTy &HwReg, 1585 OperandInfoTy &Offset, 1586 OperandInfoTy &Width); 1587 bool validateHwreg(const OperandInfoTy &HwReg, 1588 const OperandInfoTy &Offset, 1589 const OperandInfoTy &Width); 1590 1591 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1592 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1593 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1594 1595 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1596 const OperandVector &Operands) const; 1597 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1598 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1599 SMLoc getLitLoc(const OperandVector &Operands) const; 1600 SMLoc getConstLoc(const OperandVector &Operands) const; 1601 1602 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1603 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1605 bool validateSOPLiteral(const MCInst &Inst) const; 1606 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1607 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1608 bool validateIntClampSupported(const MCInst &Inst); 1609 bool validateMIMGAtomicDMask(const MCInst &Inst); 1610 bool validateMIMGGatherDMask(const MCInst &Inst); 1611 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1612 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1613 bool validateMIMGAddrSize(const MCInst &Inst); 1614 bool validateMIMGD16(const MCInst &Inst); 1615 bool validateMIMGDim(const MCInst &Inst); 1616 bool validateMIMGMSAA(const MCInst &Inst); 1617 bool validateOpSel(const MCInst &Inst); 1618 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1619 bool validateVccOperand(unsigned Reg) const; 1620 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1621 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1622 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1623 bool validateAGPRLdSt(const MCInst &Inst) const; 1624 bool validateVGPRAlign(const MCInst &Inst) const; 1625 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1626 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1627 bool validateDivScale(const MCInst &Inst); 1628 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1629 const SMLoc &IDLoc); 1630 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1631 const SMLoc &IDLoc); 1632 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1633 unsigned getConstantBusLimit(unsigned Opcode) const; 1634 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1635 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1636 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1637 1638 bool isSupportedMnemo(StringRef Mnemo, 1639 const FeatureBitset &FBS); 1640 bool isSupportedMnemo(StringRef Mnemo, 1641 const FeatureBitset &FBS, 1642 ArrayRef<unsigned> Variants); 1643 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1644 1645 bool isId(const StringRef Id) const; 1646 bool isId(const AsmToken &Token, const StringRef Id) const; 1647 bool isToken(const AsmToken::TokenKind Kind) const; 1648 bool trySkipId(const StringRef Id); 1649 bool trySkipId(const StringRef Pref, const StringRef Id); 1650 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1651 bool trySkipToken(const AsmToken::TokenKind Kind); 1652 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1653 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1654 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1655 1656 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1657 AsmToken::TokenKind getTokenKind() const; 1658 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1659 bool parseExpr(OperandVector &Operands); 1660 StringRef getTokenStr() const; 1661 AsmToken peekToken(); 1662 AsmToken getToken() const; 1663 SMLoc getLoc() const; 1664 void lex(); 1665 1666 public: 1667 void onBeginOfFile() override; 1668 1669 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1670 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1671 1672 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1673 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1674 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1675 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1676 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1677 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1678 1679 bool parseSwizzleOperand(int64_t &Op, 1680 const unsigned MinVal, 1681 const unsigned MaxVal, 1682 const StringRef ErrMsg, 1683 SMLoc &Loc); 1684 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1685 const unsigned MinVal, 1686 const unsigned MaxVal, 1687 const StringRef ErrMsg); 1688 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1689 bool parseSwizzleOffset(int64_t &Imm); 1690 bool parseSwizzleMacro(int64_t &Imm); 1691 bool parseSwizzleQuadPerm(int64_t &Imm); 1692 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1693 bool parseSwizzleBroadcast(int64_t &Imm); 1694 bool parseSwizzleSwap(int64_t &Imm); 1695 bool parseSwizzleReverse(int64_t &Imm); 1696 1697 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1698 int64_t parseGPRIdxMacro(); 1699 1700 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1701 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1702 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1703 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1704 1705 AMDGPUOperand::Ptr defaultCPol() const; 1706 1707 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1708 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1709 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1710 AMDGPUOperand::Ptr defaultFlatOffset() const; 1711 1712 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1713 1714 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1715 OptionalImmIndexMap &OptionalIdx); 1716 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1717 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1718 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1719 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1720 OptionalImmIndexMap &OptionalIdx); 1721 1722 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1723 1724 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1725 bool IsAtomic = false); 1726 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1727 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1728 1729 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1730 1731 bool parseDimId(unsigned &Encoding); 1732 OperandMatchResultTy parseDim(OperandVector &Operands); 1733 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1734 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1735 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1736 int64_t parseDPPCtrlSel(StringRef Ctrl); 1737 int64_t parseDPPCtrlPerm(); 1738 AMDGPUOperand::Ptr defaultRowMask() const; 1739 AMDGPUOperand::Ptr defaultBankMask() const; 1740 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1741 AMDGPUOperand::Ptr defaultFI() const; 1742 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1743 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1744 1745 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1746 AMDGPUOperand::ImmTy Type); 1747 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1748 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1749 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1750 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1751 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1752 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1753 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1754 uint64_t BasicInstType, 1755 bool SkipDstVcc = false, 1756 bool SkipSrcVcc = false); 1757 1758 AMDGPUOperand::Ptr defaultBLGP() const; 1759 AMDGPUOperand::Ptr defaultCBSZ() const; 1760 AMDGPUOperand::Ptr defaultABID() const; 1761 1762 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1763 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1764 1765 OperandMatchResultTy parseWaitVDST(OperandVector &Operands); 1766 AMDGPUOperand::Ptr defaultWaitVDST() const; 1767 }; 1768 1769 struct OptionalOperand { 1770 const char *Name; 1771 AMDGPUOperand::ImmTy Type; 1772 bool IsBit; 1773 bool (*ConvertResult)(int64_t&); 1774 }; 1775 1776 } // end anonymous namespace 1777 1778 // May be called with integer type with equivalent bitwidth. 1779 static const fltSemantics *getFltSemantics(unsigned Size) { 1780 switch (Size) { 1781 case 4: 1782 return &APFloat::IEEEsingle(); 1783 case 8: 1784 return &APFloat::IEEEdouble(); 1785 case 2: 1786 return &APFloat::IEEEhalf(); 1787 default: 1788 llvm_unreachable("unsupported fp type"); 1789 } 1790 } 1791 1792 static const fltSemantics *getFltSemantics(MVT VT) { 1793 return getFltSemantics(VT.getSizeInBits() / 8); 1794 } 1795 1796 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1797 switch (OperandType) { 1798 case AMDGPU::OPERAND_REG_IMM_INT32: 1799 case AMDGPU::OPERAND_REG_IMM_FP32: 1800 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1801 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1802 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1803 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1805 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1806 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1807 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1808 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1809 case AMDGPU::OPERAND_KIMM32: 1810 return &APFloat::IEEEsingle(); 1811 case AMDGPU::OPERAND_REG_IMM_INT64: 1812 case AMDGPU::OPERAND_REG_IMM_FP64: 1813 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1814 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1815 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1816 return &APFloat::IEEEdouble(); 1817 case AMDGPU::OPERAND_REG_IMM_INT16: 1818 case AMDGPU::OPERAND_REG_IMM_FP16: 1819 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1820 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1822 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1823 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1824 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1825 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1826 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1827 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1828 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1829 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1830 case AMDGPU::OPERAND_KIMM16: 1831 return &APFloat::IEEEhalf(); 1832 default: 1833 llvm_unreachable("unsupported fp type"); 1834 } 1835 } 1836 1837 //===----------------------------------------------------------------------===// 1838 // Operand 1839 //===----------------------------------------------------------------------===// 1840 1841 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1842 bool Lost; 1843 1844 // Convert literal to single precision 1845 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1846 APFloat::rmNearestTiesToEven, 1847 &Lost); 1848 // We allow precision lost but not overflow or underflow 1849 if (Status != APFloat::opOK && 1850 Lost && 1851 ((Status & APFloat::opOverflow) != 0 || 1852 (Status & APFloat::opUnderflow) != 0)) { 1853 return false; 1854 } 1855 1856 return true; 1857 } 1858 1859 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1860 return isUIntN(Size, Val) || isIntN(Size, Val); 1861 } 1862 1863 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1864 if (VT.getScalarType() == MVT::i16) { 1865 // FP immediate values are broken. 1866 return isInlinableIntLiteral(Val); 1867 } 1868 1869 // f16/v2f16 operands work correctly for all values. 1870 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1871 } 1872 1873 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1874 1875 // This is a hack to enable named inline values like 1876 // shared_base with both 32-bit and 64-bit operands. 1877 // Note that these values are defined as 1878 // 32-bit operands only. 1879 if (isInlineValue()) { 1880 return true; 1881 } 1882 1883 if (!isImmTy(ImmTyNone)) { 1884 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1885 return false; 1886 } 1887 // TODO: We should avoid using host float here. It would be better to 1888 // check the float bit values which is what a few other places do. 1889 // We've had bot failures before due to weird NaN support on mips hosts. 1890 1891 APInt Literal(64, Imm.Val); 1892 1893 if (Imm.IsFPImm) { // We got fp literal token 1894 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1895 return AMDGPU::isInlinableLiteral64(Imm.Val, 1896 AsmParser->hasInv2PiInlineImm()); 1897 } 1898 1899 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1900 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1901 return false; 1902 1903 if (type.getScalarSizeInBits() == 16) { 1904 return isInlineableLiteralOp16( 1905 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1906 type, AsmParser->hasInv2PiInlineImm()); 1907 } 1908 1909 // Check if single precision literal is inlinable 1910 return AMDGPU::isInlinableLiteral32( 1911 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1912 AsmParser->hasInv2PiInlineImm()); 1913 } 1914 1915 // We got int literal token. 1916 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1917 return AMDGPU::isInlinableLiteral64(Imm.Val, 1918 AsmParser->hasInv2PiInlineImm()); 1919 } 1920 1921 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1922 return false; 1923 } 1924 1925 if (type.getScalarSizeInBits() == 16) { 1926 return isInlineableLiteralOp16( 1927 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1928 type, AsmParser->hasInv2PiInlineImm()); 1929 } 1930 1931 return AMDGPU::isInlinableLiteral32( 1932 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1933 AsmParser->hasInv2PiInlineImm()); 1934 } 1935 1936 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1937 // Check that this immediate can be added as literal 1938 if (!isImmTy(ImmTyNone)) { 1939 return false; 1940 } 1941 1942 if (!Imm.IsFPImm) { 1943 // We got int literal token. 1944 1945 if (type == MVT::f64 && hasFPModifiers()) { 1946 // Cannot apply fp modifiers to int literals preserving the same semantics 1947 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1948 // disable these cases. 1949 return false; 1950 } 1951 1952 unsigned Size = type.getSizeInBits(); 1953 if (Size == 64) 1954 Size = 32; 1955 1956 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1957 // types. 1958 return isSafeTruncation(Imm.Val, Size); 1959 } 1960 1961 // We got fp literal token 1962 if (type == MVT::f64) { // Expected 64-bit fp operand 1963 // We would set low 64-bits of literal to zeroes but we accept this literals 1964 return true; 1965 } 1966 1967 if (type == MVT::i64) { // Expected 64-bit int operand 1968 // We don't allow fp literals in 64-bit integer instructions. It is 1969 // unclear how we should encode them. 1970 return false; 1971 } 1972 1973 // We allow fp literals with f16x2 operands assuming that the specified 1974 // literal goes into the lower half and the upper half is zero. We also 1975 // require that the literal may be losslessly converted to f16. 1976 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1977 (type == MVT::v2i16)? MVT::i16 : 1978 (type == MVT::v2f32)? MVT::f32 : type; 1979 1980 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1981 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1982 } 1983 1984 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1985 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1986 } 1987 1988 bool AMDGPUOperand::isVRegWithInputMods() const { 1989 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1990 // GFX90A allows DPP on 64-bit operands. 1991 (isRegClass(AMDGPU::VReg_64RegClassID) && 1992 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1993 } 1994 1995 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1996 if (AsmParser->isVI()) 1997 return isVReg32(); 1998 else if (AsmParser->isGFX9Plus()) 1999 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 2000 else 2001 return false; 2002 } 2003 2004 bool AMDGPUOperand::isSDWAFP16Operand() const { 2005 return isSDWAOperand(MVT::f16); 2006 } 2007 2008 bool AMDGPUOperand::isSDWAFP32Operand() const { 2009 return isSDWAOperand(MVT::f32); 2010 } 2011 2012 bool AMDGPUOperand::isSDWAInt16Operand() const { 2013 return isSDWAOperand(MVT::i16); 2014 } 2015 2016 bool AMDGPUOperand::isSDWAInt32Operand() const { 2017 return isSDWAOperand(MVT::i32); 2018 } 2019 2020 bool AMDGPUOperand::isBoolReg() const { 2021 auto FB = AsmParser->getFeatureBits(); 2022 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2023 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2024 } 2025 2026 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2027 { 2028 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2029 assert(Size == 2 || Size == 4 || Size == 8); 2030 2031 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2032 2033 if (Imm.Mods.Abs) { 2034 Val &= ~FpSignMask; 2035 } 2036 if (Imm.Mods.Neg) { 2037 Val ^= FpSignMask; 2038 } 2039 2040 return Val; 2041 } 2042 2043 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2044 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2045 Inst.getNumOperands())) { 2046 addLiteralImmOperand(Inst, Imm.Val, 2047 ApplyModifiers & 2048 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2049 } else { 2050 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2051 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2052 setImmKindNone(); 2053 } 2054 } 2055 2056 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2057 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2058 auto OpNum = Inst.getNumOperands(); 2059 // Check that this operand accepts literals 2060 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2061 2062 if (ApplyModifiers) { 2063 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2064 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2065 Val = applyInputFPModifiers(Val, Size); 2066 } 2067 2068 APInt Literal(64, Val); 2069 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2070 2071 if (Imm.IsFPImm) { // We got fp literal token 2072 switch (OpTy) { 2073 case AMDGPU::OPERAND_REG_IMM_INT64: 2074 case AMDGPU::OPERAND_REG_IMM_FP64: 2075 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2076 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2077 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2078 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2079 AsmParser->hasInv2PiInlineImm())) { 2080 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2081 setImmKindConst(); 2082 return; 2083 } 2084 2085 // Non-inlineable 2086 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2087 // For fp operands we check if low 32 bits are zeros 2088 if (Literal.getLoBits(32) != 0) { 2089 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2090 "Can't encode literal as exact 64-bit floating-point operand. " 2091 "Low 32-bits will be set to zero"); 2092 } 2093 2094 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2095 setImmKindLiteral(); 2096 return; 2097 } 2098 2099 // We don't allow fp literals in 64-bit integer instructions. It is 2100 // unclear how we should encode them. This case should be checked earlier 2101 // in predicate methods (isLiteralImm()) 2102 llvm_unreachable("fp literal in 64-bit integer instruction."); 2103 2104 case AMDGPU::OPERAND_REG_IMM_INT32: 2105 case AMDGPU::OPERAND_REG_IMM_FP32: 2106 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2107 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2108 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2109 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2110 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2111 case AMDGPU::OPERAND_REG_IMM_INT16: 2112 case AMDGPU::OPERAND_REG_IMM_FP16: 2113 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2114 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2115 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2116 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2117 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2118 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2119 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2120 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2122 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2123 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2124 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2125 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2126 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2127 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2128 case AMDGPU::OPERAND_KIMM32: 2129 case AMDGPU::OPERAND_KIMM16: { 2130 bool lost; 2131 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2132 // Convert literal to single precision 2133 FPLiteral.convert(*getOpFltSemantics(OpTy), 2134 APFloat::rmNearestTiesToEven, &lost); 2135 // We allow precision lost but not overflow or underflow. This should be 2136 // checked earlier in isLiteralImm() 2137 2138 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2139 Inst.addOperand(MCOperand::createImm(ImmVal)); 2140 setImmKindLiteral(); 2141 return; 2142 } 2143 default: 2144 llvm_unreachable("invalid operand size"); 2145 } 2146 2147 return; 2148 } 2149 2150 // We got int literal token. 2151 // Only sign extend inline immediates. 2152 switch (OpTy) { 2153 case AMDGPU::OPERAND_REG_IMM_INT32: 2154 case AMDGPU::OPERAND_REG_IMM_FP32: 2155 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2156 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2157 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2158 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2159 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2160 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2161 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2162 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2163 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2164 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2165 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2166 if (isSafeTruncation(Val, 32) && 2167 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2168 AsmParser->hasInv2PiInlineImm())) { 2169 Inst.addOperand(MCOperand::createImm(Val)); 2170 setImmKindConst(); 2171 return; 2172 } 2173 2174 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2175 setImmKindLiteral(); 2176 return; 2177 2178 case AMDGPU::OPERAND_REG_IMM_INT64: 2179 case AMDGPU::OPERAND_REG_IMM_FP64: 2180 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2181 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2182 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2183 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2184 Inst.addOperand(MCOperand::createImm(Val)); 2185 setImmKindConst(); 2186 return; 2187 } 2188 2189 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2190 setImmKindLiteral(); 2191 return; 2192 2193 case AMDGPU::OPERAND_REG_IMM_INT16: 2194 case AMDGPU::OPERAND_REG_IMM_FP16: 2195 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2196 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2197 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2198 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2199 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2200 if (isSafeTruncation(Val, 16) && 2201 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2202 AsmParser->hasInv2PiInlineImm())) { 2203 Inst.addOperand(MCOperand::createImm(Val)); 2204 setImmKindConst(); 2205 return; 2206 } 2207 2208 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2209 setImmKindLiteral(); 2210 return; 2211 2212 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2213 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2214 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2215 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2216 assert(isSafeTruncation(Val, 16)); 2217 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2218 AsmParser->hasInv2PiInlineImm())); 2219 2220 Inst.addOperand(MCOperand::createImm(Val)); 2221 return; 2222 } 2223 case AMDGPU::OPERAND_KIMM32: 2224 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2225 setImmKindNone(); 2226 return; 2227 case AMDGPU::OPERAND_KIMM16: 2228 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2229 setImmKindNone(); 2230 return; 2231 default: 2232 llvm_unreachable("invalid operand size"); 2233 } 2234 } 2235 2236 template <unsigned Bitwidth> 2237 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2238 APInt Literal(64, Imm.Val); 2239 setImmKindNone(); 2240 2241 if (!Imm.IsFPImm) { 2242 // We got int literal token. 2243 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2244 return; 2245 } 2246 2247 bool Lost; 2248 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2249 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2250 APFloat::rmNearestTiesToEven, &Lost); 2251 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2252 } 2253 2254 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2255 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2256 } 2257 2258 static bool isInlineValue(unsigned Reg) { 2259 switch (Reg) { 2260 case AMDGPU::SRC_SHARED_BASE: 2261 case AMDGPU::SRC_SHARED_LIMIT: 2262 case AMDGPU::SRC_PRIVATE_BASE: 2263 case AMDGPU::SRC_PRIVATE_LIMIT: 2264 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2265 return true; 2266 case AMDGPU::SRC_VCCZ: 2267 case AMDGPU::SRC_EXECZ: 2268 case AMDGPU::SRC_SCC: 2269 return true; 2270 case AMDGPU::SGPR_NULL: 2271 return true; 2272 default: 2273 return false; 2274 } 2275 } 2276 2277 bool AMDGPUOperand::isInlineValue() const { 2278 return isRegKind() && ::isInlineValue(getReg()); 2279 } 2280 2281 //===----------------------------------------------------------------------===// 2282 // AsmParser 2283 //===----------------------------------------------------------------------===// 2284 2285 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2286 if (Is == IS_VGPR) { 2287 switch (RegWidth) { 2288 default: return -1; 2289 case 32: 2290 return AMDGPU::VGPR_32RegClassID; 2291 case 64: 2292 return AMDGPU::VReg_64RegClassID; 2293 case 96: 2294 return AMDGPU::VReg_96RegClassID; 2295 case 128: 2296 return AMDGPU::VReg_128RegClassID; 2297 case 160: 2298 return AMDGPU::VReg_160RegClassID; 2299 case 192: 2300 return AMDGPU::VReg_192RegClassID; 2301 case 224: 2302 return AMDGPU::VReg_224RegClassID; 2303 case 256: 2304 return AMDGPU::VReg_256RegClassID; 2305 case 512: 2306 return AMDGPU::VReg_512RegClassID; 2307 case 1024: 2308 return AMDGPU::VReg_1024RegClassID; 2309 } 2310 } else if (Is == IS_TTMP) { 2311 switch (RegWidth) { 2312 default: return -1; 2313 case 32: 2314 return AMDGPU::TTMP_32RegClassID; 2315 case 64: 2316 return AMDGPU::TTMP_64RegClassID; 2317 case 128: 2318 return AMDGPU::TTMP_128RegClassID; 2319 case 256: 2320 return AMDGPU::TTMP_256RegClassID; 2321 case 512: 2322 return AMDGPU::TTMP_512RegClassID; 2323 } 2324 } else if (Is == IS_SGPR) { 2325 switch (RegWidth) { 2326 default: return -1; 2327 case 32: 2328 return AMDGPU::SGPR_32RegClassID; 2329 case 64: 2330 return AMDGPU::SGPR_64RegClassID; 2331 case 96: 2332 return AMDGPU::SGPR_96RegClassID; 2333 case 128: 2334 return AMDGPU::SGPR_128RegClassID; 2335 case 160: 2336 return AMDGPU::SGPR_160RegClassID; 2337 case 192: 2338 return AMDGPU::SGPR_192RegClassID; 2339 case 224: 2340 return AMDGPU::SGPR_224RegClassID; 2341 case 256: 2342 return AMDGPU::SGPR_256RegClassID; 2343 case 512: 2344 return AMDGPU::SGPR_512RegClassID; 2345 } 2346 } else if (Is == IS_AGPR) { 2347 switch (RegWidth) { 2348 default: return -1; 2349 case 32: 2350 return AMDGPU::AGPR_32RegClassID; 2351 case 64: 2352 return AMDGPU::AReg_64RegClassID; 2353 case 96: 2354 return AMDGPU::AReg_96RegClassID; 2355 case 128: 2356 return AMDGPU::AReg_128RegClassID; 2357 case 160: 2358 return AMDGPU::AReg_160RegClassID; 2359 case 192: 2360 return AMDGPU::AReg_192RegClassID; 2361 case 224: 2362 return AMDGPU::AReg_224RegClassID; 2363 case 256: 2364 return AMDGPU::AReg_256RegClassID; 2365 case 512: 2366 return AMDGPU::AReg_512RegClassID; 2367 case 1024: 2368 return AMDGPU::AReg_1024RegClassID; 2369 } 2370 } 2371 return -1; 2372 } 2373 2374 static unsigned getSpecialRegForName(StringRef RegName) { 2375 return StringSwitch<unsigned>(RegName) 2376 .Case("exec", AMDGPU::EXEC) 2377 .Case("vcc", AMDGPU::VCC) 2378 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2379 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2380 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2381 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2382 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2383 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2384 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2385 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2386 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2387 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2388 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2389 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2390 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2391 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2392 .Case("m0", AMDGPU::M0) 2393 .Case("vccz", AMDGPU::SRC_VCCZ) 2394 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2395 .Case("execz", AMDGPU::SRC_EXECZ) 2396 .Case("src_execz", AMDGPU::SRC_EXECZ) 2397 .Case("scc", AMDGPU::SRC_SCC) 2398 .Case("src_scc", AMDGPU::SRC_SCC) 2399 .Case("tba", AMDGPU::TBA) 2400 .Case("tma", AMDGPU::TMA) 2401 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2402 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2403 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2404 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2405 .Case("vcc_lo", AMDGPU::VCC_LO) 2406 .Case("vcc_hi", AMDGPU::VCC_HI) 2407 .Case("exec_lo", AMDGPU::EXEC_LO) 2408 .Case("exec_hi", AMDGPU::EXEC_HI) 2409 .Case("tma_lo", AMDGPU::TMA_LO) 2410 .Case("tma_hi", AMDGPU::TMA_HI) 2411 .Case("tba_lo", AMDGPU::TBA_LO) 2412 .Case("tba_hi", AMDGPU::TBA_HI) 2413 .Case("pc", AMDGPU::PC_REG) 2414 .Case("null", AMDGPU::SGPR_NULL) 2415 .Default(AMDGPU::NoRegister); 2416 } 2417 2418 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2419 SMLoc &EndLoc, bool RestoreOnFailure) { 2420 auto R = parseRegister(); 2421 if (!R) return true; 2422 assert(R->isReg()); 2423 RegNo = R->getReg(); 2424 StartLoc = R->getStartLoc(); 2425 EndLoc = R->getEndLoc(); 2426 return false; 2427 } 2428 2429 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2430 SMLoc &EndLoc) { 2431 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2432 } 2433 2434 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2435 SMLoc &StartLoc, 2436 SMLoc &EndLoc) { 2437 bool Result = 2438 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2439 bool PendingErrors = getParser().hasPendingError(); 2440 getParser().clearPendingErrors(); 2441 if (PendingErrors) 2442 return MatchOperand_ParseFail; 2443 if (Result) 2444 return MatchOperand_NoMatch; 2445 return MatchOperand_Success; 2446 } 2447 2448 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2449 RegisterKind RegKind, unsigned Reg1, 2450 SMLoc Loc) { 2451 switch (RegKind) { 2452 case IS_SPECIAL: 2453 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2454 Reg = AMDGPU::EXEC; 2455 RegWidth = 64; 2456 return true; 2457 } 2458 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2459 Reg = AMDGPU::FLAT_SCR; 2460 RegWidth = 64; 2461 return true; 2462 } 2463 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2464 Reg = AMDGPU::XNACK_MASK; 2465 RegWidth = 64; 2466 return true; 2467 } 2468 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2469 Reg = AMDGPU::VCC; 2470 RegWidth = 64; 2471 return true; 2472 } 2473 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2474 Reg = AMDGPU::TBA; 2475 RegWidth = 64; 2476 return true; 2477 } 2478 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2479 Reg = AMDGPU::TMA; 2480 RegWidth = 64; 2481 return true; 2482 } 2483 Error(Loc, "register does not fit in the list"); 2484 return false; 2485 case IS_VGPR: 2486 case IS_SGPR: 2487 case IS_AGPR: 2488 case IS_TTMP: 2489 if (Reg1 != Reg + RegWidth / 32) { 2490 Error(Loc, "registers in a list must have consecutive indices"); 2491 return false; 2492 } 2493 RegWidth += 32; 2494 return true; 2495 default: 2496 llvm_unreachable("unexpected register kind"); 2497 } 2498 } 2499 2500 struct RegInfo { 2501 StringLiteral Name; 2502 RegisterKind Kind; 2503 }; 2504 2505 static constexpr RegInfo RegularRegisters[] = { 2506 {{"v"}, IS_VGPR}, 2507 {{"s"}, IS_SGPR}, 2508 {{"ttmp"}, IS_TTMP}, 2509 {{"acc"}, IS_AGPR}, 2510 {{"a"}, IS_AGPR}, 2511 }; 2512 2513 static bool isRegularReg(RegisterKind Kind) { 2514 return Kind == IS_VGPR || 2515 Kind == IS_SGPR || 2516 Kind == IS_TTMP || 2517 Kind == IS_AGPR; 2518 } 2519 2520 static const RegInfo* getRegularRegInfo(StringRef Str) { 2521 for (const RegInfo &Reg : RegularRegisters) 2522 if (Str.startswith(Reg.Name)) 2523 return &Reg; 2524 return nullptr; 2525 } 2526 2527 static bool getRegNum(StringRef Str, unsigned& Num) { 2528 return !Str.getAsInteger(10, Num); 2529 } 2530 2531 bool 2532 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2533 const AsmToken &NextToken) const { 2534 2535 // A list of consecutive registers: [s0,s1,s2,s3] 2536 if (Token.is(AsmToken::LBrac)) 2537 return true; 2538 2539 if (!Token.is(AsmToken::Identifier)) 2540 return false; 2541 2542 // A single register like s0 or a range of registers like s[0:1] 2543 2544 StringRef Str = Token.getString(); 2545 const RegInfo *Reg = getRegularRegInfo(Str); 2546 if (Reg) { 2547 StringRef RegName = Reg->Name; 2548 StringRef RegSuffix = Str.substr(RegName.size()); 2549 if (!RegSuffix.empty()) { 2550 unsigned Num; 2551 // A single register with an index: rXX 2552 if (getRegNum(RegSuffix, Num)) 2553 return true; 2554 } else { 2555 // A range of registers: r[XX:YY]. 2556 if (NextToken.is(AsmToken::LBrac)) 2557 return true; 2558 } 2559 } 2560 2561 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2562 } 2563 2564 bool 2565 AMDGPUAsmParser::isRegister() 2566 { 2567 return isRegister(getToken(), peekToken()); 2568 } 2569 2570 unsigned 2571 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2572 unsigned RegNum, 2573 unsigned RegWidth, 2574 SMLoc Loc) { 2575 2576 assert(isRegularReg(RegKind)); 2577 2578 unsigned AlignSize = 1; 2579 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2580 // SGPR and TTMP registers must be aligned. 2581 // Max required alignment is 4 dwords. 2582 AlignSize = std::min(RegWidth / 32, 4u); 2583 } 2584 2585 if (RegNum % AlignSize != 0) { 2586 Error(Loc, "invalid register alignment"); 2587 return AMDGPU::NoRegister; 2588 } 2589 2590 unsigned RegIdx = RegNum / AlignSize; 2591 int RCID = getRegClass(RegKind, RegWidth); 2592 if (RCID == -1) { 2593 Error(Loc, "invalid or unsupported register size"); 2594 return AMDGPU::NoRegister; 2595 } 2596 2597 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2598 const MCRegisterClass RC = TRI->getRegClass(RCID); 2599 if (RegIdx >= RC.getNumRegs()) { 2600 Error(Loc, "register index is out of range"); 2601 return AMDGPU::NoRegister; 2602 } 2603 2604 return RC.getRegister(RegIdx); 2605 } 2606 2607 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2608 int64_t RegLo, RegHi; 2609 if (!skipToken(AsmToken::LBrac, "missing register index")) 2610 return false; 2611 2612 SMLoc FirstIdxLoc = getLoc(); 2613 SMLoc SecondIdxLoc; 2614 2615 if (!parseExpr(RegLo)) 2616 return false; 2617 2618 if (trySkipToken(AsmToken::Colon)) { 2619 SecondIdxLoc = getLoc(); 2620 if (!parseExpr(RegHi)) 2621 return false; 2622 } else { 2623 RegHi = RegLo; 2624 } 2625 2626 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2627 return false; 2628 2629 if (!isUInt<32>(RegLo)) { 2630 Error(FirstIdxLoc, "invalid register index"); 2631 return false; 2632 } 2633 2634 if (!isUInt<32>(RegHi)) { 2635 Error(SecondIdxLoc, "invalid register index"); 2636 return false; 2637 } 2638 2639 if (RegLo > RegHi) { 2640 Error(FirstIdxLoc, "first register index should not exceed second index"); 2641 return false; 2642 } 2643 2644 Num = static_cast<unsigned>(RegLo); 2645 RegWidth = 32 * ((RegHi - RegLo) + 1); 2646 return true; 2647 } 2648 2649 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2650 unsigned &RegNum, unsigned &RegWidth, 2651 SmallVectorImpl<AsmToken> &Tokens) { 2652 assert(isToken(AsmToken::Identifier)); 2653 unsigned Reg = getSpecialRegForName(getTokenStr()); 2654 if (Reg) { 2655 RegNum = 0; 2656 RegWidth = 32; 2657 RegKind = IS_SPECIAL; 2658 Tokens.push_back(getToken()); 2659 lex(); // skip register name 2660 } 2661 return Reg; 2662 } 2663 2664 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2665 unsigned &RegNum, unsigned &RegWidth, 2666 SmallVectorImpl<AsmToken> &Tokens) { 2667 assert(isToken(AsmToken::Identifier)); 2668 StringRef RegName = getTokenStr(); 2669 auto Loc = getLoc(); 2670 2671 const RegInfo *RI = getRegularRegInfo(RegName); 2672 if (!RI) { 2673 Error(Loc, "invalid register name"); 2674 return AMDGPU::NoRegister; 2675 } 2676 2677 Tokens.push_back(getToken()); 2678 lex(); // skip register name 2679 2680 RegKind = RI->Kind; 2681 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2682 if (!RegSuffix.empty()) { 2683 // Single 32-bit register: vXX. 2684 if (!getRegNum(RegSuffix, RegNum)) { 2685 Error(Loc, "invalid register index"); 2686 return AMDGPU::NoRegister; 2687 } 2688 RegWidth = 32; 2689 } else { 2690 // Range of registers: v[XX:YY]. ":YY" is optional. 2691 if (!ParseRegRange(RegNum, RegWidth)) 2692 return AMDGPU::NoRegister; 2693 } 2694 2695 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2696 } 2697 2698 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2699 unsigned &RegWidth, 2700 SmallVectorImpl<AsmToken> &Tokens) { 2701 unsigned Reg = AMDGPU::NoRegister; 2702 auto ListLoc = getLoc(); 2703 2704 if (!skipToken(AsmToken::LBrac, 2705 "expected a register or a list of registers")) { 2706 return AMDGPU::NoRegister; 2707 } 2708 2709 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2710 2711 auto Loc = getLoc(); 2712 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2713 return AMDGPU::NoRegister; 2714 if (RegWidth != 32) { 2715 Error(Loc, "expected a single 32-bit register"); 2716 return AMDGPU::NoRegister; 2717 } 2718 2719 for (; trySkipToken(AsmToken::Comma); ) { 2720 RegisterKind NextRegKind; 2721 unsigned NextReg, NextRegNum, NextRegWidth; 2722 Loc = getLoc(); 2723 2724 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2725 NextRegNum, NextRegWidth, 2726 Tokens)) { 2727 return AMDGPU::NoRegister; 2728 } 2729 if (NextRegWidth != 32) { 2730 Error(Loc, "expected a single 32-bit register"); 2731 return AMDGPU::NoRegister; 2732 } 2733 if (NextRegKind != RegKind) { 2734 Error(Loc, "registers in a list must be of the same kind"); 2735 return AMDGPU::NoRegister; 2736 } 2737 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2738 return AMDGPU::NoRegister; 2739 } 2740 2741 if (!skipToken(AsmToken::RBrac, 2742 "expected a comma or a closing square bracket")) { 2743 return AMDGPU::NoRegister; 2744 } 2745 2746 if (isRegularReg(RegKind)) 2747 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2748 2749 return Reg; 2750 } 2751 2752 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2753 unsigned &RegNum, unsigned &RegWidth, 2754 SmallVectorImpl<AsmToken> &Tokens) { 2755 auto Loc = getLoc(); 2756 Reg = AMDGPU::NoRegister; 2757 2758 if (isToken(AsmToken::Identifier)) { 2759 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2760 if (Reg == AMDGPU::NoRegister) 2761 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2762 } else { 2763 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2764 } 2765 2766 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2767 if (Reg == AMDGPU::NoRegister) { 2768 assert(Parser.hasPendingError()); 2769 return false; 2770 } 2771 2772 if (!subtargetHasRegister(*TRI, Reg)) { 2773 if (Reg == AMDGPU::SGPR_NULL) { 2774 Error(Loc, "'null' operand is not supported on this GPU"); 2775 } else { 2776 Error(Loc, "register not available on this GPU"); 2777 } 2778 return false; 2779 } 2780 2781 return true; 2782 } 2783 2784 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2785 unsigned &RegNum, unsigned &RegWidth, 2786 bool RestoreOnFailure /*=false*/) { 2787 Reg = AMDGPU::NoRegister; 2788 2789 SmallVector<AsmToken, 1> Tokens; 2790 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2791 if (RestoreOnFailure) { 2792 while (!Tokens.empty()) { 2793 getLexer().UnLex(Tokens.pop_back_val()); 2794 } 2795 } 2796 return true; 2797 } 2798 return false; 2799 } 2800 2801 Optional<StringRef> 2802 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2803 switch (RegKind) { 2804 case IS_VGPR: 2805 return StringRef(".amdgcn.next_free_vgpr"); 2806 case IS_SGPR: 2807 return StringRef(".amdgcn.next_free_sgpr"); 2808 default: 2809 return None; 2810 } 2811 } 2812 2813 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2814 auto SymbolName = getGprCountSymbolName(RegKind); 2815 assert(SymbolName && "initializing invalid register kind"); 2816 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2817 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2818 } 2819 2820 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2821 unsigned DwordRegIndex, 2822 unsigned RegWidth) { 2823 // Symbols are only defined for GCN targets 2824 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2825 return true; 2826 2827 auto SymbolName = getGprCountSymbolName(RegKind); 2828 if (!SymbolName) 2829 return true; 2830 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2831 2832 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2833 int64_t OldCount; 2834 2835 if (!Sym->isVariable()) 2836 return !Error(getLoc(), 2837 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2838 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2839 return !Error( 2840 getLoc(), 2841 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2842 2843 if (OldCount <= NewMax) 2844 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2845 2846 return true; 2847 } 2848 2849 std::unique_ptr<AMDGPUOperand> 2850 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2851 const auto &Tok = getToken(); 2852 SMLoc StartLoc = Tok.getLoc(); 2853 SMLoc EndLoc = Tok.getEndLoc(); 2854 RegisterKind RegKind; 2855 unsigned Reg, RegNum, RegWidth; 2856 2857 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2858 return nullptr; 2859 } 2860 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2861 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2862 return nullptr; 2863 } else 2864 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2865 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2866 } 2867 2868 OperandMatchResultTy 2869 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2870 // TODO: add syntactic sugar for 1/(2*PI) 2871 2872 assert(!isRegister()); 2873 assert(!isModifier()); 2874 2875 const auto& Tok = getToken(); 2876 const auto& NextTok = peekToken(); 2877 bool IsReal = Tok.is(AsmToken::Real); 2878 SMLoc S = getLoc(); 2879 bool Negate = false; 2880 2881 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2882 lex(); 2883 IsReal = true; 2884 Negate = true; 2885 } 2886 2887 if (IsReal) { 2888 // Floating-point expressions are not supported. 2889 // Can only allow floating-point literals with an 2890 // optional sign. 2891 2892 StringRef Num = getTokenStr(); 2893 lex(); 2894 2895 APFloat RealVal(APFloat::IEEEdouble()); 2896 auto roundMode = APFloat::rmNearestTiesToEven; 2897 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2898 return MatchOperand_ParseFail; 2899 } 2900 if (Negate) 2901 RealVal.changeSign(); 2902 2903 Operands.push_back( 2904 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2905 AMDGPUOperand::ImmTyNone, true)); 2906 2907 return MatchOperand_Success; 2908 2909 } else { 2910 int64_t IntVal; 2911 const MCExpr *Expr; 2912 SMLoc S = getLoc(); 2913 2914 if (HasSP3AbsModifier) { 2915 // This is a workaround for handling expressions 2916 // as arguments of SP3 'abs' modifier, for example: 2917 // |1.0| 2918 // |-1| 2919 // |1+x| 2920 // This syntax is not compatible with syntax of standard 2921 // MC expressions (due to the trailing '|'). 2922 SMLoc EndLoc; 2923 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2924 return MatchOperand_ParseFail; 2925 } else { 2926 if (Parser.parseExpression(Expr)) 2927 return MatchOperand_ParseFail; 2928 } 2929 2930 if (Expr->evaluateAsAbsolute(IntVal)) { 2931 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2932 } else { 2933 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2934 } 2935 2936 return MatchOperand_Success; 2937 } 2938 2939 return MatchOperand_NoMatch; 2940 } 2941 2942 OperandMatchResultTy 2943 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2944 if (!isRegister()) 2945 return MatchOperand_NoMatch; 2946 2947 if (auto R = parseRegister()) { 2948 assert(R->isReg()); 2949 Operands.push_back(std::move(R)); 2950 return MatchOperand_Success; 2951 } 2952 return MatchOperand_ParseFail; 2953 } 2954 2955 OperandMatchResultTy 2956 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2957 auto res = parseReg(Operands); 2958 if (res != MatchOperand_NoMatch) { 2959 return res; 2960 } else if (isModifier()) { 2961 return MatchOperand_NoMatch; 2962 } else { 2963 return parseImm(Operands, HasSP3AbsMod); 2964 } 2965 } 2966 2967 bool 2968 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2969 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2970 const auto &str = Token.getString(); 2971 return str == "abs" || str == "neg" || str == "sext"; 2972 } 2973 return false; 2974 } 2975 2976 bool 2977 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2978 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2979 } 2980 2981 bool 2982 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2983 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2984 } 2985 2986 bool 2987 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2988 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2989 } 2990 2991 // Check if this is an operand modifier or an opcode modifier 2992 // which may look like an expression but it is not. We should 2993 // avoid parsing these modifiers as expressions. Currently 2994 // recognized sequences are: 2995 // |...| 2996 // abs(...) 2997 // neg(...) 2998 // sext(...) 2999 // -reg 3000 // -|...| 3001 // -abs(...) 3002 // name:... 3003 // Note that simple opcode modifiers like 'gds' may be parsed as 3004 // expressions; this is a special case. See getExpressionAsToken. 3005 // 3006 bool 3007 AMDGPUAsmParser::isModifier() { 3008 3009 AsmToken Tok = getToken(); 3010 AsmToken NextToken[2]; 3011 peekTokens(NextToken); 3012 3013 return isOperandModifier(Tok, NextToken[0]) || 3014 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3015 isOpcodeModifierWithVal(Tok, NextToken[0]); 3016 } 3017 3018 // Check if the current token is an SP3 'neg' modifier. 3019 // Currently this modifier is allowed in the following context: 3020 // 3021 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3022 // 2. Before an 'abs' modifier: -abs(...) 3023 // 3. Before an SP3 'abs' modifier: -|...| 3024 // 3025 // In all other cases "-" is handled as a part 3026 // of an expression that follows the sign. 3027 // 3028 // Note: When "-" is followed by an integer literal, 3029 // this is interpreted as integer negation rather 3030 // than a floating-point NEG modifier applied to N. 3031 // Beside being contr-intuitive, such use of floating-point 3032 // NEG modifier would have resulted in different meaning 3033 // of integer literals used with VOP1/2/C and VOP3, 3034 // for example: 3035 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3036 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3037 // Negative fp literals with preceding "-" are 3038 // handled likewise for uniformity 3039 // 3040 bool 3041 AMDGPUAsmParser::parseSP3NegModifier() { 3042 3043 AsmToken NextToken[2]; 3044 peekTokens(NextToken); 3045 3046 if (isToken(AsmToken::Minus) && 3047 (isRegister(NextToken[0], NextToken[1]) || 3048 NextToken[0].is(AsmToken::Pipe) || 3049 isId(NextToken[0], "abs"))) { 3050 lex(); 3051 return true; 3052 } 3053 3054 return false; 3055 } 3056 3057 OperandMatchResultTy 3058 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3059 bool AllowImm) { 3060 bool Neg, SP3Neg; 3061 bool Abs, SP3Abs; 3062 SMLoc Loc; 3063 3064 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3065 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3066 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3067 return MatchOperand_ParseFail; 3068 } 3069 3070 SP3Neg = parseSP3NegModifier(); 3071 3072 Loc = getLoc(); 3073 Neg = trySkipId("neg"); 3074 if (Neg && SP3Neg) { 3075 Error(Loc, "expected register or immediate"); 3076 return MatchOperand_ParseFail; 3077 } 3078 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3079 return MatchOperand_ParseFail; 3080 3081 Abs = trySkipId("abs"); 3082 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3083 return MatchOperand_ParseFail; 3084 3085 Loc = getLoc(); 3086 SP3Abs = trySkipToken(AsmToken::Pipe); 3087 if (Abs && SP3Abs) { 3088 Error(Loc, "expected register or immediate"); 3089 return MatchOperand_ParseFail; 3090 } 3091 3092 OperandMatchResultTy Res; 3093 if (AllowImm) { 3094 Res = parseRegOrImm(Operands, SP3Abs); 3095 } else { 3096 Res = parseReg(Operands); 3097 } 3098 if (Res != MatchOperand_Success) { 3099 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3100 } 3101 3102 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3103 return MatchOperand_ParseFail; 3104 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3105 return MatchOperand_ParseFail; 3106 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3107 return MatchOperand_ParseFail; 3108 3109 AMDGPUOperand::Modifiers Mods; 3110 Mods.Abs = Abs || SP3Abs; 3111 Mods.Neg = Neg || SP3Neg; 3112 3113 if (Mods.hasFPModifiers()) { 3114 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3115 if (Op.isExpr()) { 3116 Error(Op.getStartLoc(), "expected an absolute expression"); 3117 return MatchOperand_ParseFail; 3118 } 3119 Op.setModifiers(Mods); 3120 } 3121 return MatchOperand_Success; 3122 } 3123 3124 OperandMatchResultTy 3125 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3126 bool AllowImm) { 3127 bool Sext = trySkipId("sext"); 3128 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3129 return MatchOperand_ParseFail; 3130 3131 OperandMatchResultTy Res; 3132 if (AllowImm) { 3133 Res = parseRegOrImm(Operands); 3134 } else { 3135 Res = parseReg(Operands); 3136 } 3137 if (Res != MatchOperand_Success) { 3138 return Sext? MatchOperand_ParseFail : Res; 3139 } 3140 3141 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3142 return MatchOperand_ParseFail; 3143 3144 AMDGPUOperand::Modifiers Mods; 3145 Mods.Sext = Sext; 3146 3147 if (Mods.hasIntModifiers()) { 3148 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3149 if (Op.isExpr()) { 3150 Error(Op.getStartLoc(), "expected an absolute expression"); 3151 return MatchOperand_ParseFail; 3152 } 3153 Op.setModifiers(Mods); 3154 } 3155 3156 return MatchOperand_Success; 3157 } 3158 3159 OperandMatchResultTy 3160 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3161 return parseRegOrImmWithFPInputMods(Operands, false); 3162 } 3163 3164 OperandMatchResultTy 3165 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3166 return parseRegOrImmWithIntInputMods(Operands, false); 3167 } 3168 3169 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3170 auto Loc = getLoc(); 3171 if (trySkipId("off")) { 3172 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3173 AMDGPUOperand::ImmTyOff, false)); 3174 return MatchOperand_Success; 3175 } 3176 3177 if (!isRegister()) 3178 return MatchOperand_NoMatch; 3179 3180 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3181 if (Reg) { 3182 Operands.push_back(std::move(Reg)); 3183 return MatchOperand_Success; 3184 } 3185 3186 return MatchOperand_ParseFail; 3187 3188 } 3189 3190 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3191 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3192 3193 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3194 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3195 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3196 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3197 return Match_InvalidOperand; 3198 3199 if ((TSFlags & SIInstrFlags::VOP3) && 3200 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3201 getForcedEncodingSize() != 64) 3202 return Match_PreferE32; 3203 3204 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3205 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3206 // v_mac_f32/16 allow only dst_sel == DWORD; 3207 auto OpNum = 3208 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3209 const auto &Op = Inst.getOperand(OpNum); 3210 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3211 return Match_InvalidOperand; 3212 } 3213 } 3214 3215 return Match_Success; 3216 } 3217 3218 static ArrayRef<unsigned> getAllVariants() { 3219 static const unsigned Variants[] = { 3220 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3221 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3222 }; 3223 3224 return makeArrayRef(Variants); 3225 } 3226 3227 // What asm variants we should check 3228 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3229 if (getForcedEncodingSize() == 32) { 3230 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3231 return makeArrayRef(Variants); 3232 } 3233 3234 if (isForcedVOP3()) { 3235 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3236 return makeArrayRef(Variants); 3237 } 3238 3239 if (isForcedSDWA()) { 3240 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3241 AMDGPUAsmVariants::SDWA9}; 3242 return makeArrayRef(Variants); 3243 } 3244 3245 if (isForcedDPP()) { 3246 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3247 return makeArrayRef(Variants); 3248 } 3249 3250 return getAllVariants(); 3251 } 3252 3253 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3254 if (getForcedEncodingSize() == 32) 3255 return "e32"; 3256 3257 if (isForcedVOP3()) 3258 return "e64"; 3259 3260 if (isForcedSDWA()) 3261 return "sdwa"; 3262 3263 if (isForcedDPP()) 3264 return "dpp"; 3265 3266 return ""; 3267 } 3268 3269 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3270 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3271 const unsigned Num = Desc.getNumImplicitUses(); 3272 for (unsigned i = 0; i < Num; ++i) { 3273 unsigned Reg = Desc.ImplicitUses[i]; 3274 switch (Reg) { 3275 case AMDGPU::FLAT_SCR: 3276 case AMDGPU::VCC: 3277 case AMDGPU::VCC_LO: 3278 case AMDGPU::VCC_HI: 3279 case AMDGPU::M0: 3280 return Reg; 3281 default: 3282 break; 3283 } 3284 } 3285 return AMDGPU::NoRegister; 3286 } 3287 3288 // NB: This code is correct only when used to check constant 3289 // bus limitations because GFX7 support no f16 inline constants. 3290 // Note that there are no cases when a GFX7 opcode violates 3291 // constant bus limitations due to the use of an f16 constant. 3292 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3293 unsigned OpIdx) const { 3294 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3295 3296 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3297 return false; 3298 } 3299 3300 const MCOperand &MO = Inst.getOperand(OpIdx); 3301 3302 int64_t Val = MO.getImm(); 3303 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3304 3305 switch (OpSize) { // expected operand size 3306 case 8: 3307 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3308 case 4: 3309 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3310 case 2: { 3311 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3312 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3313 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3314 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3315 return AMDGPU::isInlinableIntLiteral(Val); 3316 3317 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3318 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3319 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3320 return AMDGPU::isInlinableIntLiteralV216(Val); 3321 3322 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3323 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3324 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3325 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3326 3327 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3328 } 3329 default: 3330 llvm_unreachable("invalid operand size"); 3331 } 3332 } 3333 3334 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3335 if (!isGFX10Plus()) 3336 return 1; 3337 3338 switch (Opcode) { 3339 // 64-bit shift instructions can use only one scalar value input 3340 case AMDGPU::V_LSHLREV_B64_e64: 3341 case AMDGPU::V_LSHLREV_B64_gfx10: 3342 case AMDGPU::V_LSHRREV_B64_e64: 3343 case AMDGPU::V_LSHRREV_B64_gfx10: 3344 case AMDGPU::V_ASHRREV_I64_e64: 3345 case AMDGPU::V_ASHRREV_I64_gfx10: 3346 case AMDGPU::V_LSHL_B64_e64: 3347 case AMDGPU::V_LSHR_B64_e64: 3348 case AMDGPU::V_ASHR_I64_e64: 3349 return 1; 3350 default: 3351 return 2; 3352 } 3353 } 3354 3355 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3356 const MCOperand &MO = Inst.getOperand(OpIdx); 3357 if (MO.isImm()) { 3358 return !isInlineConstant(Inst, OpIdx); 3359 } else if (MO.isReg()) { 3360 auto Reg = MO.getReg(); 3361 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3362 auto PReg = mc2PseudoReg(Reg); 3363 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3364 } else { 3365 return true; 3366 } 3367 } 3368 3369 bool 3370 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3371 const OperandVector &Operands) { 3372 const unsigned Opcode = Inst.getOpcode(); 3373 const MCInstrDesc &Desc = MII.get(Opcode); 3374 unsigned LastSGPR = AMDGPU::NoRegister; 3375 unsigned ConstantBusUseCount = 0; 3376 unsigned NumLiterals = 0; 3377 unsigned LiteralSize; 3378 3379 if (Desc.TSFlags & 3380 (SIInstrFlags::VOPC | 3381 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3382 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3383 SIInstrFlags::SDWA)) { 3384 // Check special imm operands (used by madmk, etc) 3385 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3386 ++NumLiterals; 3387 LiteralSize = 4; 3388 } 3389 3390 SmallDenseSet<unsigned> SGPRsUsed; 3391 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3392 if (SGPRUsed != AMDGPU::NoRegister) { 3393 SGPRsUsed.insert(SGPRUsed); 3394 ++ConstantBusUseCount; 3395 } 3396 3397 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3398 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3399 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3400 3401 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3402 3403 for (int OpIdx : OpIndices) { 3404 if (OpIdx == -1) break; 3405 3406 const MCOperand &MO = Inst.getOperand(OpIdx); 3407 if (usesConstantBus(Inst, OpIdx)) { 3408 if (MO.isReg()) { 3409 LastSGPR = mc2PseudoReg(MO.getReg()); 3410 // Pairs of registers with a partial intersections like these 3411 // s0, s[0:1] 3412 // flat_scratch_lo, flat_scratch 3413 // flat_scratch_lo, flat_scratch_hi 3414 // are theoretically valid but they are disabled anyway. 3415 // Note that this code mimics SIInstrInfo::verifyInstruction 3416 if (!SGPRsUsed.count(LastSGPR)) { 3417 SGPRsUsed.insert(LastSGPR); 3418 ++ConstantBusUseCount; 3419 } 3420 } else { // Expression or a literal 3421 3422 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3423 continue; // special operand like VINTERP attr_chan 3424 3425 // An instruction may use only one literal. 3426 // This has been validated on the previous step. 3427 // See validateVOPLiteral. 3428 // This literal may be used as more than one operand. 3429 // If all these operands are of the same size, 3430 // this literal counts as one scalar value. 3431 // Otherwise it counts as 2 scalar values. 3432 // See "GFX10 Shader Programming", section 3.6.2.3. 3433 3434 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3435 if (Size < 4) Size = 4; 3436 3437 if (NumLiterals == 0) { 3438 NumLiterals = 1; 3439 LiteralSize = Size; 3440 } else if (LiteralSize != Size) { 3441 NumLiterals = 2; 3442 } 3443 } 3444 } 3445 } 3446 } 3447 ConstantBusUseCount += NumLiterals; 3448 3449 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3450 return true; 3451 3452 SMLoc LitLoc = getLitLoc(Operands); 3453 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3454 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3455 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3456 return false; 3457 } 3458 3459 bool 3460 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3461 const OperandVector &Operands) { 3462 const unsigned Opcode = Inst.getOpcode(); 3463 const MCInstrDesc &Desc = MII.get(Opcode); 3464 3465 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3466 if (DstIdx == -1 || 3467 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3468 return true; 3469 } 3470 3471 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3472 3473 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3474 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3475 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3476 3477 assert(DstIdx != -1); 3478 const MCOperand &Dst = Inst.getOperand(DstIdx); 3479 assert(Dst.isReg()); 3480 3481 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3482 3483 for (int SrcIdx : SrcIndices) { 3484 if (SrcIdx == -1) break; 3485 const MCOperand &Src = Inst.getOperand(SrcIdx); 3486 if (Src.isReg()) { 3487 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3488 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3489 Error(getRegLoc(SrcReg, Operands), 3490 "destination must be different than all sources"); 3491 return false; 3492 } 3493 } 3494 } 3495 3496 return true; 3497 } 3498 3499 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3500 3501 const unsigned Opc = Inst.getOpcode(); 3502 const MCInstrDesc &Desc = MII.get(Opc); 3503 3504 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3505 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3506 assert(ClampIdx != -1); 3507 return Inst.getOperand(ClampIdx).getImm() == 0; 3508 } 3509 3510 return true; 3511 } 3512 3513 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3514 3515 const unsigned Opc = Inst.getOpcode(); 3516 const MCInstrDesc &Desc = MII.get(Opc); 3517 3518 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3519 return None; 3520 3521 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3522 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3523 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3524 3525 assert(VDataIdx != -1); 3526 3527 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3528 return None; 3529 3530 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3531 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3532 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3533 if (DMask == 0) 3534 DMask = 1; 3535 3536 bool isPackedD16 = false; 3537 unsigned DataSize = 3538 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3539 if (hasPackedD16()) { 3540 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3541 isPackedD16 = D16Idx >= 0; 3542 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3543 DataSize = (DataSize + 1) / 2; 3544 } 3545 3546 if ((VDataSize / 4) == DataSize + TFESize) 3547 return None; 3548 3549 return StringRef(isPackedD16 3550 ? "image data size does not match dmask, d16 and tfe" 3551 : "image data size does not match dmask and tfe"); 3552 } 3553 3554 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3555 const unsigned Opc = Inst.getOpcode(); 3556 const MCInstrDesc &Desc = MII.get(Opc); 3557 3558 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3559 return true; 3560 3561 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3562 3563 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3564 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3565 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3566 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3567 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3568 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3569 3570 assert(VAddr0Idx != -1); 3571 assert(SrsrcIdx != -1); 3572 assert(SrsrcIdx > VAddr0Idx); 3573 3574 if (DimIdx == -1) 3575 return true; // intersect_ray 3576 3577 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3578 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3579 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3580 unsigned ActualAddrSize = 3581 IsNSA ? SrsrcIdx - VAddr0Idx 3582 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3583 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3584 3585 unsigned ExpectedAddrSize = 3586 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3587 3588 if (!IsNSA) { 3589 if (ExpectedAddrSize > 8) 3590 ExpectedAddrSize = 16; 3591 3592 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3593 // This provides backward compatibility for assembly created 3594 // before 160b/192b/224b types were directly supported. 3595 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3596 return true; 3597 } 3598 3599 return ActualAddrSize == ExpectedAddrSize; 3600 } 3601 3602 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3603 3604 const unsigned Opc = Inst.getOpcode(); 3605 const MCInstrDesc &Desc = MII.get(Opc); 3606 3607 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3608 return true; 3609 if (!Desc.mayLoad() || !Desc.mayStore()) 3610 return true; // Not atomic 3611 3612 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3613 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3614 3615 // This is an incomplete check because image_atomic_cmpswap 3616 // may only use 0x3 and 0xf while other atomic operations 3617 // may use 0x1 and 0x3. However these limitations are 3618 // verified when we check that dmask matches dst size. 3619 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3620 } 3621 3622 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3623 3624 const unsigned Opc = Inst.getOpcode(); 3625 const MCInstrDesc &Desc = MII.get(Opc); 3626 3627 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3628 return true; 3629 3630 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3631 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3632 3633 // GATHER4 instructions use dmask in a different fashion compared to 3634 // other MIMG instructions. The only useful DMASK values are 3635 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3636 // (red,red,red,red) etc.) The ISA document doesn't mention 3637 // this. 3638 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3639 } 3640 3641 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3642 const unsigned Opc = Inst.getOpcode(); 3643 const MCInstrDesc &Desc = MII.get(Opc); 3644 3645 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3646 return true; 3647 3648 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3649 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3650 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3651 3652 if (!BaseOpcode->MSAA) 3653 return true; 3654 3655 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3656 assert(DimIdx != -1); 3657 3658 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3659 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3660 3661 return DimInfo->MSAA; 3662 } 3663 3664 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3665 { 3666 switch (Opcode) { 3667 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3668 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3669 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3670 return true; 3671 default: 3672 return false; 3673 } 3674 } 3675 3676 // movrels* opcodes should only allow VGPRS as src0. 3677 // This is specified in .td description for vop1/vop3, 3678 // but sdwa is handled differently. See isSDWAOperand. 3679 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3680 const OperandVector &Operands) { 3681 3682 const unsigned Opc = Inst.getOpcode(); 3683 const MCInstrDesc &Desc = MII.get(Opc); 3684 3685 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3686 return true; 3687 3688 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3689 assert(Src0Idx != -1); 3690 3691 SMLoc ErrLoc; 3692 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3693 if (Src0.isReg()) { 3694 auto Reg = mc2PseudoReg(Src0.getReg()); 3695 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3696 if (!isSGPR(Reg, TRI)) 3697 return true; 3698 ErrLoc = getRegLoc(Reg, Operands); 3699 } else { 3700 ErrLoc = getConstLoc(Operands); 3701 } 3702 3703 Error(ErrLoc, "source operand must be a VGPR"); 3704 return false; 3705 } 3706 3707 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3708 const OperandVector &Operands) { 3709 3710 const unsigned Opc = Inst.getOpcode(); 3711 3712 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3713 return true; 3714 3715 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3716 assert(Src0Idx != -1); 3717 3718 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3719 if (!Src0.isReg()) 3720 return true; 3721 3722 auto Reg = mc2PseudoReg(Src0.getReg()); 3723 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3724 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3725 Error(getRegLoc(Reg, Operands), 3726 "source operand must be either a VGPR or an inline constant"); 3727 return false; 3728 } 3729 3730 return true; 3731 } 3732 3733 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3734 const OperandVector &Operands) { 3735 const unsigned Opc = Inst.getOpcode(); 3736 const MCInstrDesc &Desc = MII.get(Opc); 3737 3738 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3739 return true; 3740 3741 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3742 if (Src2Idx == -1) 3743 return true; 3744 3745 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3746 if (!Src2.isReg()) 3747 return true; 3748 3749 MCRegister Src2Reg = Src2.getReg(); 3750 MCRegister DstReg = Inst.getOperand(0).getReg(); 3751 if (Src2Reg == DstReg) 3752 return true; 3753 3754 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3755 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3756 return true; 3757 3758 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3759 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3760 "source 2 operand must not partially overlap with dst"); 3761 return false; 3762 } 3763 3764 return true; 3765 } 3766 3767 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3768 switch (Inst.getOpcode()) { 3769 default: 3770 return true; 3771 case V_DIV_SCALE_F32_gfx6_gfx7: 3772 case V_DIV_SCALE_F32_vi: 3773 case V_DIV_SCALE_F32_gfx10: 3774 case V_DIV_SCALE_F64_gfx6_gfx7: 3775 case V_DIV_SCALE_F64_vi: 3776 case V_DIV_SCALE_F64_gfx10: 3777 break; 3778 } 3779 3780 // TODO: Check that src0 = src1 or src2. 3781 3782 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3783 AMDGPU::OpName::src2_modifiers, 3784 AMDGPU::OpName::src2_modifiers}) { 3785 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3786 .getImm() & 3787 SISrcMods::ABS) { 3788 return false; 3789 } 3790 } 3791 3792 return true; 3793 } 3794 3795 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3796 3797 const unsigned Opc = Inst.getOpcode(); 3798 const MCInstrDesc &Desc = MII.get(Opc); 3799 3800 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3801 return true; 3802 3803 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3804 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3805 if (isCI() || isSI()) 3806 return false; 3807 } 3808 3809 return true; 3810 } 3811 3812 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3813 const unsigned Opc = Inst.getOpcode(); 3814 const MCInstrDesc &Desc = MII.get(Opc); 3815 3816 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3817 return true; 3818 3819 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3820 if (DimIdx < 0) 3821 return true; 3822 3823 long Imm = Inst.getOperand(DimIdx).getImm(); 3824 if (Imm < 0 || Imm >= 8) 3825 return false; 3826 3827 return true; 3828 } 3829 3830 static bool IsRevOpcode(const unsigned Opcode) 3831 { 3832 switch (Opcode) { 3833 case AMDGPU::V_SUBREV_F32_e32: 3834 case AMDGPU::V_SUBREV_F32_e64: 3835 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3836 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3837 case AMDGPU::V_SUBREV_F32_e32_vi: 3838 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3839 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3840 case AMDGPU::V_SUBREV_F32_e64_vi: 3841 3842 case AMDGPU::V_SUBREV_CO_U32_e32: 3843 case AMDGPU::V_SUBREV_CO_U32_e64: 3844 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3845 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3846 3847 case AMDGPU::V_SUBBREV_U32_e32: 3848 case AMDGPU::V_SUBBREV_U32_e64: 3849 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3850 case AMDGPU::V_SUBBREV_U32_e32_vi: 3851 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3852 case AMDGPU::V_SUBBREV_U32_e64_vi: 3853 3854 case AMDGPU::V_SUBREV_U32_e32: 3855 case AMDGPU::V_SUBREV_U32_e64: 3856 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3857 case AMDGPU::V_SUBREV_U32_e32_vi: 3858 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3859 case AMDGPU::V_SUBREV_U32_e64_vi: 3860 3861 case AMDGPU::V_SUBREV_F16_e32: 3862 case AMDGPU::V_SUBREV_F16_e64: 3863 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3864 case AMDGPU::V_SUBREV_F16_e32_vi: 3865 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3866 case AMDGPU::V_SUBREV_F16_e64_vi: 3867 3868 case AMDGPU::V_SUBREV_U16_e32: 3869 case AMDGPU::V_SUBREV_U16_e64: 3870 case AMDGPU::V_SUBREV_U16_e32_vi: 3871 case AMDGPU::V_SUBREV_U16_e64_vi: 3872 3873 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3874 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3875 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3876 3877 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3878 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3879 3880 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3881 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3882 3883 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3884 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3885 3886 case AMDGPU::V_LSHRREV_B32_e32: 3887 case AMDGPU::V_LSHRREV_B32_e64: 3888 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3889 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3890 case AMDGPU::V_LSHRREV_B32_e32_vi: 3891 case AMDGPU::V_LSHRREV_B32_e64_vi: 3892 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3893 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3894 3895 case AMDGPU::V_ASHRREV_I32_e32: 3896 case AMDGPU::V_ASHRREV_I32_e64: 3897 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3898 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3899 case AMDGPU::V_ASHRREV_I32_e32_vi: 3900 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3901 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3902 case AMDGPU::V_ASHRREV_I32_e64_vi: 3903 3904 case AMDGPU::V_LSHLREV_B32_e32: 3905 case AMDGPU::V_LSHLREV_B32_e64: 3906 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3907 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3908 case AMDGPU::V_LSHLREV_B32_e32_vi: 3909 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3910 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3911 case AMDGPU::V_LSHLREV_B32_e64_vi: 3912 3913 case AMDGPU::V_LSHLREV_B16_e32: 3914 case AMDGPU::V_LSHLREV_B16_e64: 3915 case AMDGPU::V_LSHLREV_B16_e32_vi: 3916 case AMDGPU::V_LSHLREV_B16_e64_vi: 3917 case AMDGPU::V_LSHLREV_B16_gfx10: 3918 3919 case AMDGPU::V_LSHRREV_B16_e32: 3920 case AMDGPU::V_LSHRREV_B16_e64: 3921 case AMDGPU::V_LSHRREV_B16_e32_vi: 3922 case AMDGPU::V_LSHRREV_B16_e64_vi: 3923 case AMDGPU::V_LSHRREV_B16_gfx10: 3924 3925 case AMDGPU::V_ASHRREV_I16_e32: 3926 case AMDGPU::V_ASHRREV_I16_e64: 3927 case AMDGPU::V_ASHRREV_I16_e32_vi: 3928 case AMDGPU::V_ASHRREV_I16_e64_vi: 3929 case AMDGPU::V_ASHRREV_I16_gfx10: 3930 3931 case AMDGPU::V_LSHLREV_B64_e64: 3932 case AMDGPU::V_LSHLREV_B64_gfx10: 3933 case AMDGPU::V_LSHLREV_B64_vi: 3934 3935 case AMDGPU::V_LSHRREV_B64_e64: 3936 case AMDGPU::V_LSHRREV_B64_gfx10: 3937 case AMDGPU::V_LSHRREV_B64_vi: 3938 3939 case AMDGPU::V_ASHRREV_I64_e64: 3940 case AMDGPU::V_ASHRREV_I64_gfx10: 3941 case AMDGPU::V_ASHRREV_I64_vi: 3942 3943 case AMDGPU::V_PK_LSHLREV_B16: 3944 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3945 case AMDGPU::V_PK_LSHLREV_B16_vi: 3946 3947 case AMDGPU::V_PK_LSHRREV_B16: 3948 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3949 case AMDGPU::V_PK_LSHRREV_B16_vi: 3950 case AMDGPU::V_PK_ASHRREV_I16: 3951 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3952 case AMDGPU::V_PK_ASHRREV_I16_vi: 3953 return true; 3954 default: 3955 return false; 3956 } 3957 } 3958 3959 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3960 3961 using namespace SIInstrFlags; 3962 const unsigned Opcode = Inst.getOpcode(); 3963 const MCInstrDesc &Desc = MII.get(Opcode); 3964 3965 // lds_direct register is defined so that it can be used 3966 // with 9-bit operands only. Ignore encodings which do not accept these. 3967 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3968 if ((Desc.TSFlags & Enc) == 0) 3969 return None; 3970 3971 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3972 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3973 if (SrcIdx == -1) 3974 break; 3975 const auto &Src = Inst.getOperand(SrcIdx); 3976 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3977 3978 if (isGFX90A() || isGFX11Plus()) 3979 return StringRef("lds_direct is not supported on this GPU"); 3980 3981 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3982 return StringRef("lds_direct cannot be used with this instruction"); 3983 3984 if (SrcName != OpName::src0) 3985 return StringRef("lds_direct may be used as src0 only"); 3986 } 3987 } 3988 3989 return None; 3990 } 3991 3992 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3993 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3994 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3995 if (Op.isFlatOffset()) 3996 return Op.getStartLoc(); 3997 } 3998 return getLoc(); 3999 } 4000 4001 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 4002 const OperandVector &Operands) { 4003 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4004 if ((TSFlags & SIInstrFlags::FLAT) == 0) 4005 return true; 4006 4007 auto Opcode = Inst.getOpcode(); 4008 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4009 assert(OpNum != -1); 4010 4011 const auto &Op = Inst.getOperand(OpNum); 4012 if (!hasFlatOffsets() && Op.getImm() != 0) { 4013 Error(getFlatOffsetLoc(Operands), 4014 "flat offset modifier is not supported on this GPU"); 4015 return false; 4016 } 4017 4018 // For FLAT segment the offset must be positive; 4019 // MSB is ignored and forced to zero. 4020 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4021 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4022 if (!isIntN(OffsetSize, Op.getImm())) { 4023 Error(getFlatOffsetLoc(Operands), 4024 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4025 return false; 4026 } 4027 } else { 4028 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4029 if (!isUIntN(OffsetSize, Op.getImm())) { 4030 Error(getFlatOffsetLoc(Operands), 4031 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4032 return false; 4033 } 4034 } 4035 4036 return true; 4037 } 4038 4039 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4040 // Start with second operand because SMEM Offset cannot be dst or src0. 4041 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4042 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4043 if (Op.isSMEMOffset()) 4044 return Op.getStartLoc(); 4045 } 4046 return getLoc(); 4047 } 4048 4049 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4050 const OperandVector &Operands) { 4051 if (isCI() || isSI()) 4052 return true; 4053 4054 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4055 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4056 return true; 4057 4058 auto Opcode = Inst.getOpcode(); 4059 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4060 if (OpNum == -1) 4061 return true; 4062 4063 const auto &Op = Inst.getOperand(OpNum); 4064 if (!Op.isImm()) 4065 return true; 4066 4067 uint64_t Offset = Op.getImm(); 4068 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4069 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4070 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4071 return true; 4072 4073 Error(getSMEMOffsetLoc(Operands), 4074 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4075 "expected a 21-bit signed offset"); 4076 4077 return false; 4078 } 4079 4080 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4081 unsigned Opcode = Inst.getOpcode(); 4082 const MCInstrDesc &Desc = MII.get(Opcode); 4083 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4084 return true; 4085 4086 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4087 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4088 4089 const int OpIndices[] = { Src0Idx, Src1Idx }; 4090 4091 unsigned NumExprs = 0; 4092 unsigned NumLiterals = 0; 4093 uint32_t LiteralValue; 4094 4095 for (int OpIdx : OpIndices) { 4096 if (OpIdx == -1) break; 4097 4098 const MCOperand &MO = Inst.getOperand(OpIdx); 4099 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4100 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4101 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4102 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4103 if (NumLiterals == 0 || LiteralValue != Value) { 4104 LiteralValue = Value; 4105 ++NumLiterals; 4106 } 4107 } else if (MO.isExpr()) { 4108 ++NumExprs; 4109 } 4110 } 4111 } 4112 4113 return NumLiterals + NumExprs <= 1; 4114 } 4115 4116 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4117 const unsigned Opc = Inst.getOpcode(); 4118 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4119 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4120 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4121 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4122 4123 if (OpSel & ~3) 4124 return false; 4125 } 4126 4127 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4128 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4129 if (OpSelIdx != -1) { 4130 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4131 return false; 4132 } 4133 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4134 if (OpSelHiIdx != -1) { 4135 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4136 return false; 4137 } 4138 } 4139 4140 return true; 4141 } 4142 4143 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4144 const OperandVector &Operands) { 4145 const unsigned Opc = Inst.getOpcode(); 4146 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4147 if (DppCtrlIdx < 0) 4148 return true; 4149 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4150 4151 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4152 // DPP64 is supported for row_newbcast only. 4153 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4154 if (Src0Idx >= 0 && 4155 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4156 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4157 Error(S, "64 bit dpp only supports row_newbcast"); 4158 return false; 4159 } 4160 } 4161 4162 return true; 4163 } 4164 4165 // Check if VCC register matches wavefront size 4166 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4167 auto FB = getFeatureBits(); 4168 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4169 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4170 } 4171 4172 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4173 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4174 const OperandVector &Operands) { 4175 unsigned Opcode = Inst.getOpcode(); 4176 const MCInstrDesc &Desc = MII.get(Opcode); 4177 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4178 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4179 ImmIdx == -1) 4180 return true; 4181 4182 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4183 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4184 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4185 4186 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4187 4188 unsigned NumExprs = 0; 4189 unsigned NumLiterals = 0; 4190 uint32_t LiteralValue; 4191 4192 for (int OpIdx : OpIndices) { 4193 if (OpIdx == -1) 4194 continue; 4195 4196 const MCOperand &MO = Inst.getOperand(OpIdx); 4197 if (!MO.isImm() && !MO.isExpr()) 4198 continue; 4199 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4200 continue; 4201 4202 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4203 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4204 Error(getConstLoc(Operands), 4205 "inline constants are not allowed for this operand"); 4206 return false; 4207 } 4208 4209 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4210 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4211 if (NumLiterals == 0 || LiteralValue != Value) { 4212 LiteralValue = Value; 4213 ++NumLiterals; 4214 } 4215 } else if (MO.isExpr()) { 4216 ++NumExprs; 4217 } 4218 } 4219 NumLiterals += NumExprs; 4220 4221 if (!NumLiterals) 4222 return true; 4223 4224 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4225 Error(getLitLoc(Operands), "literal operands are not supported"); 4226 return false; 4227 } 4228 4229 if (NumLiterals > 1) { 4230 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4231 return false; 4232 } 4233 4234 return true; 4235 } 4236 4237 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4238 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4239 const MCRegisterInfo *MRI) { 4240 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4241 if (OpIdx < 0) 4242 return -1; 4243 4244 const MCOperand &Op = Inst.getOperand(OpIdx); 4245 if (!Op.isReg()) 4246 return -1; 4247 4248 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4249 auto Reg = Sub ? Sub : Op.getReg(); 4250 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4251 return AGPR32.contains(Reg) ? 1 : 0; 4252 } 4253 4254 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4255 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4256 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4257 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4258 SIInstrFlags::DS)) == 0) 4259 return true; 4260 4261 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4262 : AMDGPU::OpName::vdata; 4263 4264 const MCRegisterInfo *MRI = getMRI(); 4265 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4266 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4267 4268 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4269 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4270 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4271 return false; 4272 } 4273 4274 auto FB = getFeatureBits(); 4275 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4276 if (DataAreg < 0 || DstAreg < 0) 4277 return true; 4278 return DstAreg == DataAreg; 4279 } 4280 4281 return DstAreg < 1 && DataAreg < 1; 4282 } 4283 4284 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4285 auto FB = getFeatureBits(); 4286 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4287 return true; 4288 4289 const MCRegisterInfo *MRI = getMRI(); 4290 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4291 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4292 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4293 const MCOperand &Op = Inst.getOperand(I); 4294 if (!Op.isReg()) 4295 continue; 4296 4297 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4298 if (!Sub) 4299 continue; 4300 4301 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4302 return false; 4303 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4304 return false; 4305 } 4306 4307 return true; 4308 } 4309 4310 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4311 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4312 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4313 if (Op.isBLGP()) 4314 return Op.getStartLoc(); 4315 } 4316 return SMLoc(); 4317 } 4318 4319 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4320 const OperandVector &Operands) { 4321 unsigned Opc = Inst.getOpcode(); 4322 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4323 if (BlgpIdx == -1) 4324 return true; 4325 SMLoc BLGPLoc = getBLGPLoc(Operands); 4326 if (!BLGPLoc.isValid()) 4327 return true; 4328 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4329 auto FB = getFeatureBits(); 4330 bool UsesNeg = false; 4331 if (FB[AMDGPU::FeatureGFX940Insts]) { 4332 switch (Opc) { 4333 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4334 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4335 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4336 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4337 UsesNeg = true; 4338 } 4339 } 4340 4341 if (IsNeg == UsesNeg) 4342 return true; 4343 4344 Error(BLGPLoc, 4345 UsesNeg ? "invalid modifier: blgp is not supported" 4346 : "invalid modifier: neg is not supported"); 4347 4348 return false; 4349 } 4350 4351 // gfx90a has an undocumented limitation: 4352 // DS_GWS opcodes must use even aligned registers. 4353 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4354 const OperandVector &Operands) { 4355 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4356 return true; 4357 4358 int Opc = Inst.getOpcode(); 4359 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4360 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4361 return true; 4362 4363 const MCRegisterInfo *MRI = getMRI(); 4364 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4365 int Data0Pos = 4366 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4367 assert(Data0Pos != -1); 4368 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4369 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4370 if (RegIdx & 1) { 4371 SMLoc RegLoc = getRegLoc(Reg, Operands); 4372 Error(RegLoc, "vgpr must be even aligned"); 4373 return false; 4374 } 4375 4376 return true; 4377 } 4378 4379 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4380 const OperandVector &Operands, 4381 const SMLoc &IDLoc) { 4382 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4383 AMDGPU::OpName::cpol); 4384 if (CPolPos == -1) 4385 return true; 4386 4387 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4388 4389 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4390 if (TSFlags & SIInstrFlags::SMRD) { 4391 if (CPol && (isSI() || isCI())) { 4392 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4393 Error(S, "cache policy is not supported for SMRD instructions"); 4394 return false; 4395 } 4396 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4397 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4398 return false; 4399 } 4400 } 4401 4402 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4403 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4404 StringRef CStr(S.getPointer()); 4405 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4406 Error(S, "scc is not supported on this GPU"); 4407 return false; 4408 } 4409 4410 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4411 return true; 4412 4413 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4414 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4415 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4416 : "instruction must use glc"); 4417 return false; 4418 } 4419 } else { 4420 if (CPol & CPol::GLC) { 4421 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4422 StringRef CStr(S.getPointer()); 4423 S = SMLoc::getFromPointer( 4424 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4425 Error(S, isGFX940() ? "instruction must not use sc0" 4426 : "instruction must not use glc"); 4427 return false; 4428 } 4429 } 4430 4431 return true; 4432 } 4433 4434 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4435 const OperandVector &Operands, 4436 const SMLoc &IDLoc) { 4437 if (isGFX940()) 4438 return true; 4439 4440 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4441 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4442 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4443 return true; 4444 // This is FLAT LDS DMA. 4445 4446 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4447 StringRef CStr(S.getPointer()); 4448 if (!CStr.startswith("lds")) { 4449 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4450 // And LDS version should have 'lds' modifier, but it follows optional 4451 // operands so its absense is ignored by the matcher. 4452 Error(IDLoc, "invalid operands for instruction"); 4453 return false; 4454 } 4455 4456 return true; 4457 } 4458 4459 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4460 const SMLoc &IDLoc, 4461 const OperandVector &Operands) { 4462 if (auto ErrMsg = validateLdsDirect(Inst)) { 4463 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4464 return false; 4465 } 4466 if (!validateSOPLiteral(Inst)) { 4467 Error(getLitLoc(Operands), 4468 "only one literal operand is allowed"); 4469 return false; 4470 } 4471 if (!validateVOPLiteral(Inst, Operands)) { 4472 return false; 4473 } 4474 if (!validateConstantBusLimitations(Inst, Operands)) { 4475 return false; 4476 } 4477 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4478 return false; 4479 } 4480 if (!validateIntClampSupported(Inst)) { 4481 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4482 "integer clamping is not supported on this GPU"); 4483 return false; 4484 } 4485 if (!validateOpSel(Inst)) { 4486 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4487 "invalid op_sel operand"); 4488 return false; 4489 } 4490 if (!validateDPP(Inst, Operands)) { 4491 return false; 4492 } 4493 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4494 if (!validateMIMGD16(Inst)) { 4495 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4496 "d16 modifier is not supported on this GPU"); 4497 return false; 4498 } 4499 if (!validateMIMGDim(Inst)) { 4500 Error(IDLoc, "dim modifier is required on this GPU"); 4501 return false; 4502 } 4503 if (!validateMIMGMSAA(Inst)) { 4504 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4505 "invalid dim; must be MSAA type"); 4506 return false; 4507 } 4508 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4509 Error(IDLoc, *ErrMsg); 4510 return false; 4511 } 4512 if (!validateMIMGAddrSize(Inst)) { 4513 Error(IDLoc, 4514 "image address size does not match dim and a16"); 4515 return false; 4516 } 4517 if (!validateMIMGAtomicDMask(Inst)) { 4518 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4519 "invalid atomic image dmask"); 4520 return false; 4521 } 4522 if (!validateMIMGGatherDMask(Inst)) { 4523 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4524 "invalid image_gather dmask: only one bit must be set"); 4525 return false; 4526 } 4527 if (!validateMovrels(Inst, Operands)) { 4528 return false; 4529 } 4530 if (!validateFlatOffset(Inst, Operands)) { 4531 return false; 4532 } 4533 if (!validateSMEMOffset(Inst, Operands)) { 4534 return false; 4535 } 4536 if (!validateMAIAccWrite(Inst, Operands)) { 4537 return false; 4538 } 4539 if (!validateMFMA(Inst, Operands)) { 4540 return false; 4541 } 4542 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4543 return false; 4544 } 4545 4546 if (!validateAGPRLdSt(Inst)) { 4547 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4548 ? "invalid register class: data and dst should be all VGPR or AGPR" 4549 : "invalid register class: agpr loads and stores not supported on this GPU" 4550 ); 4551 return false; 4552 } 4553 if (!validateVGPRAlign(Inst)) { 4554 Error(IDLoc, 4555 "invalid register class: vgpr tuples must be 64 bit aligned"); 4556 return false; 4557 } 4558 if (!validateGWS(Inst, Operands)) { 4559 return false; 4560 } 4561 4562 if (!validateBLGP(Inst, Operands)) { 4563 return false; 4564 } 4565 4566 if (!validateDivScale(Inst)) { 4567 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4568 return false; 4569 } 4570 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4571 return false; 4572 } 4573 4574 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4575 return false; 4576 } 4577 4578 return true; 4579 } 4580 4581 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4582 const FeatureBitset &FBS, 4583 unsigned VariantID = 0); 4584 4585 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4586 const FeatureBitset &AvailableFeatures, 4587 unsigned VariantID); 4588 4589 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4590 const FeatureBitset &FBS) { 4591 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4592 } 4593 4594 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4595 const FeatureBitset &FBS, 4596 ArrayRef<unsigned> Variants) { 4597 for (auto Variant : Variants) { 4598 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4599 return true; 4600 } 4601 4602 return false; 4603 } 4604 4605 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4606 const SMLoc &IDLoc) { 4607 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4608 4609 // Check if requested instruction variant is supported. 4610 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4611 return false; 4612 4613 // This instruction is not supported. 4614 // Clear any other pending errors because they are no longer relevant. 4615 getParser().clearPendingErrors(); 4616 4617 // Requested instruction variant is not supported. 4618 // Check if any other variants are supported. 4619 StringRef VariantName = getMatchedVariantName(); 4620 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4621 return Error(IDLoc, 4622 Twine(VariantName, 4623 " variant of this instruction is not supported")); 4624 } 4625 4626 // Finally check if this instruction is supported on any other GPU. 4627 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4628 return Error(IDLoc, "instruction not supported on this GPU"); 4629 } 4630 4631 // Instruction not supported on any GPU. Probably a typo. 4632 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4633 return Error(IDLoc, "invalid instruction" + Suggestion); 4634 } 4635 4636 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4637 OperandVector &Operands, 4638 MCStreamer &Out, 4639 uint64_t &ErrorInfo, 4640 bool MatchingInlineAsm) { 4641 MCInst Inst; 4642 unsigned Result = Match_Success; 4643 for (auto Variant : getMatchedVariants()) { 4644 uint64_t EI; 4645 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4646 Variant); 4647 // We order match statuses from least to most specific. We use most specific 4648 // status as resulting 4649 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4650 if ((R == Match_Success) || 4651 (R == Match_PreferE32) || 4652 (R == Match_MissingFeature && Result != Match_PreferE32) || 4653 (R == Match_InvalidOperand && Result != Match_MissingFeature 4654 && Result != Match_PreferE32) || 4655 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4656 && Result != Match_MissingFeature 4657 && Result != Match_PreferE32)) { 4658 Result = R; 4659 ErrorInfo = EI; 4660 } 4661 if (R == Match_Success) 4662 break; 4663 } 4664 4665 if (Result == Match_Success) { 4666 if (!validateInstruction(Inst, IDLoc, Operands)) { 4667 return true; 4668 } 4669 Inst.setLoc(IDLoc); 4670 Out.emitInstruction(Inst, getSTI()); 4671 return false; 4672 } 4673 4674 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4675 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4676 return true; 4677 } 4678 4679 switch (Result) { 4680 default: break; 4681 case Match_MissingFeature: 4682 // It has been verified that the specified instruction 4683 // mnemonic is valid. A match was found but it requires 4684 // features which are not supported on this GPU. 4685 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4686 4687 case Match_InvalidOperand: { 4688 SMLoc ErrorLoc = IDLoc; 4689 if (ErrorInfo != ~0ULL) { 4690 if (ErrorInfo >= Operands.size()) { 4691 return Error(IDLoc, "too few operands for instruction"); 4692 } 4693 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4694 if (ErrorLoc == SMLoc()) 4695 ErrorLoc = IDLoc; 4696 } 4697 return Error(ErrorLoc, "invalid operand for instruction"); 4698 } 4699 4700 case Match_PreferE32: 4701 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4702 "should be encoded as e32"); 4703 case Match_MnemonicFail: 4704 llvm_unreachable("Invalid instructions should have been handled already"); 4705 } 4706 llvm_unreachable("Implement any new match types added!"); 4707 } 4708 4709 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4710 int64_t Tmp = -1; 4711 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4712 return true; 4713 } 4714 if (getParser().parseAbsoluteExpression(Tmp)) { 4715 return true; 4716 } 4717 Ret = static_cast<uint32_t>(Tmp); 4718 return false; 4719 } 4720 4721 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4722 uint32_t &Minor) { 4723 if (ParseAsAbsoluteExpression(Major)) 4724 return TokError("invalid major version"); 4725 4726 if (!trySkipToken(AsmToken::Comma)) 4727 return TokError("minor version number required, comma expected"); 4728 4729 if (ParseAsAbsoluteExpression(Minor)) 4730 return TokError("invalid minor version"); 4731 4732 return false; 4733 } 4734 4735 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4736 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4737 return TokError("directive only supported for amdgcn architecture"); 4738 4739 std::string TargetIDDirective; 4740 SMLoc TargetStart = getTok().getLoc(); 4741 if (getParser().parseEscapedString(TargetIDDirective)) 4742 return true; 4743 4744 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4745 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4746 return getParser().Error(TargetRange.Start, 4747 (Twine(".amdgcn_target directive's target id ") + 4748 Twine(TargetIDDirective) + 4749 Twine(" does not match the specified target id ") + 4750 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4751 4752 return false; 4753 } 4754 4755 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4756 return Error(Range.Start, "value out of range", Range); 4757 } 4758 4759 bool AMDGPUAsmParser::calculateGPRBlocks( 4760 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4761 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4762 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4763 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4764 // TODO(scott.linder): These calculations are duplicated from 4765 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4766 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4767 4768 unsigned NumVGPRs = NextFreeVGPR; 4769 unsigned NumSGPRs = NextFreeSGPR; 4770 4771 if (Version.Major >= 10) 4772 NumSGPRs = 0; 4773 else { 4774 unsigned MaxAddressableNumSGPRs = 4775 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4776 4777 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4778 NumSGPRs > MaxAddressableNumSGPRs) 4779 return OutOfRangeError(SGPRRange); 4780 4781 NumSGPRs += 4782 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4783 4784 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4785 NumSGPRs > MaxAddressableNumSGPRs) 4786 return OutOfRangeError(SGPRRange); 4787 4788 if (Features.test(FeatureSGPRInitBug)) 4789 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4790 } 4791 4792 VGPRBlocks = 4793 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4794 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4795 4796 return false; 4797 } 4798 4799 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4800 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4801 return TokError("directive only supported for amdgcn architecture"); 4802 4803 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4804 return TokError("directive only supported for amdhsa OS"); 4805 4806 StringRef KernelName; 4807 if (getParser().parseIdentifier(KernelName)) 4808 return true; 4809 4810 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4811 4812 StringSet<> Seen; 4813 4814 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4815 4816 SMRange VGPRRange; 4817 uint64_t NextFreeVGPR = 0; 4818 uint64_t AccumOffset = 0; 4819 uint64_t SharedVGPRCount = 0; 4820 SMRange SGPRRange; 4821 uint64_t NextFreeSGPR = 0; 4822 4823 // Count the number of user SGPRs implied from the enabled feature bits. 4824 unsigned ImpliedUserSGPRCount = 0; 4825 4826 // Track if the asm explicitly contains the directive for the user SGPR 4827 // count. 4828 Optional<unsigned> ExplicitUserSGPRCount; 4829 bool ReserveVCC = true; 4830 bool ReserveFlatScr = true; 4831 Optional<bool> EnableWavefrontSize32; 4832 4833 while (true) { 4834 while (trySkipToken(AsmToken::EndOfStatement)); 4835 4836 StringRef ID; 4837 SMRange IDRange = getTok().getLocRange(); 4838 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4839 return true; 4840 4841 if (ID == ".end_amdhsa_kernel") 4842 break; 4843 4844 if (Seen.find(ID) != Seen.end()) 4845 return TokError(".amdhsa_ directives cannot be repeated"); 4846 Seen.insert(ID); 4847 4848 SMLoc ValStart = getLoc(); 4849 int64_t IVal; 4850 if (getParser().parseAbsoluteExpression(IVal)) 4851 return true; 4852 SMLoc ValEnd = getLoc(); 4853 SMRange ValRange = SMRange(ValStart, ValEnd); 4854 4855 if (IVal < 0) 4856 return OutOfRangeError(ValRange); 4857 4858 uint64_t Val = IVal; 4859 4860 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4861 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4862 return OutOfRangeError(RANGE); \ 4863 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4864 4865 if (ID == ".amdhsa_group_segment_fixed_size") { 4866 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4867 return OutOfRangeError(ValRange); 4868 KD.group_segment_fixed_size = Val; 4869 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4870 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4871 return OutOfRangeError(ValRange); 4872 KD.private_segment_fixed_size = Val; 4873 } else if (ID == ".amdhsa_kernarg_size") { 4874 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4875 return OutOfRangeError(ValRange); 4876 KD.kernarg_size = Val; 4877 } else if (ID == ".amdhsa_user_sgpr_count") { 4878 ExplicitUserSGPRCount = Val; 4879 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4880 if (hasArchitectedFlatScratch()) 4881 return Error(IDRange.Start, 4882 "directive is not supported with architected flat scratch", 4883 IDRange); 4884 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4885 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4886 Val, ValRange); 4887 if (Val) 4888 ImpliedUserSGPRCount += 4; 4889 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4890 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4891 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4892 ValRange); 4893 if (Val) 4894 ImpliedUserSGPRCount += 2; 4895 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4896 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4897 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4898 ValRange); 4899 if (Val) 4900 ImpliedUserSGPRCount += 2; 4901 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4902 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4903 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4904 Val, ValRange); 4905 if (Val) 4906 ImpliedUserSGPRCount += 2; 4907 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4908 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4909 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4910 ValRange); 4911 if (Val) 4912 ImpliedUserSGPRCount += 2; 4913 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4914 if (hasArchitectedFlatScratch()) 4915 return Error(IDRange.Start, 4916 "directive is not supported with architected flat scratch", 4917 IDRange); 4918 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4919 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4920 ValRange); 4921 if (Val) 4922 ImpliedUserSGPRCount += 2; 4923 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4924 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4925 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4926 Val, ValRange); 4927 if (Val) 4928 ImpliedUserSGPRCount += 1; 4929 } else if (ID == ".amdhsa_wavefront_size32") { 4930 if (IVersion.Major < 10) 4931 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4932 EnableWavefrontSize32 = Val; 4933 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4934 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4935 Val, ValRange); 4936 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4937 if (hasArchitectedFlatScratch()) 4938 return Error(IDRange.Start, 4939 "directive is not supported with architected flat scratch", 4940 IDRange); 4941 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4942 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4943 } else if (ID == ".amdhsa_enable_private_segment") { 4944 if (!hasArchitectedFlatScratch()) 4945 return Error( 4946 IDRange.Start, 4947 "directive is not supported without architected flat scratch", 4948 IDRange); 4949 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4950 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4951 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4952 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4953 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4954 ValRange); 4955 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4956 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4957 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4958 ValRange); 4959 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4960 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4961 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4962 ValRange); 4963 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4964 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4965 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4966 ValRange); 4967 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4968 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4969 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4970 ValRange); 4971 } else if (ID == ".amdhsa_next_free_vgpr") { 4972 VGPRRange = ValRange; 4973 NextFreeVGPR = Val; 4974 } else if (ID == ".amdhsa_next_free_sgpr") { 4975 SGPRRange = ValRange; 4976 NextFreeSGPR = Val; 4977 } else if (ID == ".amdhsa_accum_offset") { 4978 if (!isGFX90A()) 4979 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4980 AccumOffset = Val; 4981 } else if (ID == ".amdhsa_reserve_vcc") { 4982 if (!isUInt<1>(Val)) 4983 return OutOfRangeError(ValRange); 4984 ReserveVCC = Val; 4985 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4986 if (IVersion.Major < 7) 4987 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4988 if (hasArchitectedFlatScratch()) 4989 return Error(IDRange.Start, 4990 "directive is not supported with architected flat scratch", 4991 IDRange); 4992 if (!isUInt<1>(Val)) 4993 return OutOfRangeError(ValRange); 4994 ReserveFlatScr = Val; 4995 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4996 if (IVersion.Major < 8) 4997 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4998 if (!isUInt<1>(Val)) 4999 return OutOfRangeError(ValRange); 5000 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 5001 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 5002 IDRange); 5003 } else if (ID == ".amdhsa_float_round_mode_32") { 5004 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5005 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5006 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5007 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5008 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5009 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5010 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5011 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5012 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5013 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5014 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5015 ValRange); 5016 } else if (ID == ".amdhsa_dx10_clamp") { 5017 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5018 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5019 } else if (ID == ".amdhsa_ieee_mode") { 5020 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5021 Val, ValRange); 5022 } else if (ID == ".amdhsa_fp16_overflow") { 5023 if (IVersion.Major < 9) 5024 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5025 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5026 ValRange); 5027 } else if (ID == ".amdhsa_tg_split") { 5028 if (!isGFX90A()) 5029 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5030 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5031 ValRange); 5032 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5033 if (IVersion.Major < 10) 5034 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5035 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5036 ValRange); 5037 } else if (ID == ".amdhsa_memory_ordered") { 5038 if (IVersion.Major < 10) 5039 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5040 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5041 ValRange); 5042 } else if (ID == ".amdhsa_forward_progress") { 5043 if (IVersion.Major < 10) 5044 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5045 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5046 ValRange); 5047 } else if (ID == ".amdhsa_shared_vgpr_count") { 5048 if (IVersion.Major < 10) 5049 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5050 SharedVGPRCount = Val; 5051 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5052 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5053 ValRange); 5054 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5055 PARSE_BITS_ENTRY( 5056 KD.compute_pgm_rsrc2, 5057 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5058 ValRange); 5059 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5060 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5061 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5062 Val, ValRange); 5063 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5064 PARSE_BITS_ENTRY( 5065 KD.compute_pgm_rsrc2, 5066 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5067 ValRange); 5068 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5069 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5070 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5071 Val, ValRange); 5072 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5073 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5074 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5075 Val, ValRange); 5076 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5077 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5078 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5079 Val, ValRange); 5080 } else if (ID == ".amdhsa_exception_int_div_zero") { 5081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5082 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5083 Val, ValRange); 5084 } else { 5085 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5086 } 5087 5088 #undef PARSE_BITS_ENTRY 5089 } 5090 5091 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5092 return TokError(".amdhsa_next_free_vgpr directive is required"); 5093 5094 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5095 return TokError(".amdhsa_next_free_sgpr directive is required"); 5096 5097 unsigned VGPRBlocks; 5098 unsigned SGPRBlocks; 5099 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5100 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5101 EnableWavefrontSize32, NextFreeVGPR, 5102 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5103 SGPRBlocks)) 5104 return true; 5105 5106 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5107 VGPRBlocks)) 5108 return OutOfRangeError(VGPRRange); 5109 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5110 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5111 5112 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5113 SGPRBlocks)) 5114 return OutOfRangeError(SGPRRange); 5115 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5116 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5117 SGPRBlocks); 5118 5119 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5120 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5121 "enabled user SGPRs"); 5122 5123 unsigned UserSGPRCount = 5124 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5125 5126 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5127 return TokError("too many user SGPRs enabled"); 5128 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5129 UserSGPRCount); 5130 5131 if (isGFX90A()) { 5132 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5133 return TokError(".amdhsa_accum_offset directive is required"); 5134 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5135 return TokError("accum_offset should be in range [4..256] in " 5136 "increments of 4"); 5137 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5138 return TokError("accum_offset exceeds total VGPR allocation"); 5139 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5140 (AccumOffset / 4 - 1)); 5141 } 5142 5143 if (IVersion.Major == 10) { 5144 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5145 if (SharedVGPRCount && EnableWavefrontSize32) { 5146 return TokError("shared_vgpr_count directive not valid on " 5147 "wavefront size 32"); 5148 } 5149 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5150 return TokError("shared_vgpr_count*2 + " 5151 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5152 "exceed 63\n"); 5153 } 5154 } 5155 5156 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5157 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5158 ReserveFlatScr); 5159 return false; 5160 } 5161 5162 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5163 uint32_t Major; 5164 uint32_t Minor; 5165 5166 if (ParseDirectiveMajorMinor(Major, Minor)) 5167 return true; 5168 5169 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5170 return false; 5171 } 5172 5173 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5174 uint32_t Major; 5175 uint32_t Minor; 5176 uint32_t Stepping; 5177 StringRef VendorName; 5178 StringRef ArchName; 5179 5180 // If this directive has no arguments, then use the ISA version for the 5181 // targeted GPU. 5182 if (isToken(AsmToken::EndOfStatement)) { 5183 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5184 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5185 ISA.Stepping, 5186 "AMD", "AMDGPU"); 5187 return false; 5188 } 5189 5190 if (ParseDirectiveMajorMinor(Major, Minor)) 5191 return true; 5192 5193 if (!trySkipToken(AsmToken::Comma)) 5194 return TokError("stepping version number required, comma expected"); 5195 5196 if (ParseAsAbsoluteExpression(Stepping)) 5197 return TokError("invalid stepping version"); 5198 5199 if (!trySkipToken(AsmToken::Comma)) 5200 return TokError("vendor name required, comma expected"); 5201 5202 if (!parseString(VendorName, "invalid vendor name")) 5203 return true; 5204 5205 if (!trySkipToken(AsmToken::Comma)) 5206 return TokError("arch name required, comma expected"); 5207 5208 if (!parseString(ArchName, "invalid arch name")) 5209 return true; 5210 5211 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5212 VendorName, ArchName); 5213 return false; 5214 } 5215 5216 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5217 amd_kernel_code_t &Header) { 5218 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5219 // assembly for backwards compatibility. 5220 if (ID == "max_scratch_backing_memory_byte_size") { 5221 Parser.eatToEndOfStatement(); 5222 return false; 5223 } 5224 5225 SmallString<40> ErrStr; 5226 raw_svector_ostream Err(ErrStr); 5227 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5228 return TokError(Err.str()); 5229 } 5230 Lex(); 5231 5232 if (ID == "enable_wavefront_size32") { 5233 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5234 if (!isGFX10Plus()) 5235 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5236 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5237 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5238 } else { 5239 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5240 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5241 } 5242 } 5243 5244 if (ID == "wavefront_size") { 5245 if (Header.wavefront_size == 5) { 5246 if (!isGFX10Plus()) 5247 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5248 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5249 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5250 } else if (Header.wavefront_size == 6) { 5251 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5252 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5253 } 5254 } 5255 5256 if (ID == "enable_wgp_mode") { 5257 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5258 !isGFX10Plus()) 5259 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5260 } 5261 5262 if (ID == "enable_mem_ordered") { 5263 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5264 !isGFX10Plus()) 5265 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5266 } 5267 5268 if (ID == "enable_fwd_progress") { 5269 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5270 !isGFX10Plus()) 5271 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5272 } 5273 5274 return false; 5275 } 5276 5277 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5278 amd_kernel_code_t Header; 5279 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5280 5281 while (true) { 5282 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5283 // will set the current token to EndOfStatement. 5284 while(trySkipToken(AsmToken::EndOfStatement)); 5285 5286 StringRef ID; 5287 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5288 return true; 5289 5290 if (ID == ".end_amd_kernel_code_t") 5291 break; 5292 5293 if (ParseAMDKernelCodeTValue(ID, Header)) 5294 return true; 5295 } 5296 5297 getTargetStreamer().EmitAMDKernelCodeT(Header); 5298 5299 return false; 5300 } 5301 5302 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5303 StringRef KernelName; 5304 if (!parseId(KernelName, "expected symbol name")) 5305 return true; 5306 5307 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5308 ELF::STT_AMDGPU_HSA_KERNEL); 5309 5310 KernelScope.initialize(getContext()); 5311 return false; 5312 } 5313 5314 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5315 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5316 return Error(getLoc(), 5317 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5318 "architectures"); 5319 } 5320 5321 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5322 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5323 return Error(getParser().getTok().getLoc(), "target id must match options"); 5324 5325 getTargetStreamer().EmitISAVersion(); 5326 Lex(); 5327 5328 return false; 5329 } 5330 5331 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5332 const char *AssemblerDirectiveBegin; 5333 const char *AssemblerDirectiveEnd; 5334 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5335 isHsaAbiVersion3AndAbove(&getSTI()) 5336 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5337 HSAMD::V3::AssemblerDirectiveEnd) 5338 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5339 HSAMD::AssemblerDirectiveEnd); 5340 5341 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5342 return Error(getLoc(), 5343 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5344 "not available on non-amdhsa OSes")).str()); 5345 } 5346 5347 std::string HSAMetadataString; 5348 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5349 HSAMetadataString)) 5350 return true; 5351 5352 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5353 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5354 return Error(getLoc(), "invalid HSA metadata"); 5355 } else { 5356 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5357 return Error(getLoc(), "invalid HSA metadata"); 5358 } 5359 5360 return false; 5361 } 5362 5363 /// Common code to parse out a block of text (typically YAML) between start and 5364 /// end directives. 5365 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5366 const char *AssemblerDirectiveEnd, 5367 std::string &CollectString) { 5368 5369 raw_string_ostream CollectStream(CollectString); 5370 5371 getLexer().setSkipSpace(false); 5372 5373 bool FoundEnd = false; 5374 while (!isToken(AsmToken::Eof)) { 5375 while (isToken(AsmToken::Space)) { 5376 CollectStream << getTokenStr(); 5377 Lex(); 5378 } 5379 5380 if (trySkipId(AssemblerDirectiveEnd)) { 5381 FoundEnd = true; 5382 break; 5383 } 5384 5385 CollectStream << Parser.parseStringToEndOfStatement() 5386 << getContext().getAsmInfo()->getSeparatorString(); 5387 5388 Parser.eatToEndOfStatement(); 5389 } 5390 5391 getLexer().setSkipSpace(true); 5392 5393 if (isToken(AsmToken::Eof) && !FoundEnd) { 5394 return TokError(Twine("expected directive ") + 5395 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5396 } 5397 5398 CollectStream.flush(); 5399 return false; 5400 } 5401 5402 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5403 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5404 std::string String; 5405 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5406 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5407 return true; 5408 5409 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5410 if (!PALMetadata->setFromString(String)) 5411 return Error(getLoc(), "invalid PAL metadata"); 5412 return false; 5413 } 5414 5415 /// Parse the assembler directive for old linear-format PAL metadata. 5416 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5417 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5418 return Error(getLoc(), 5419 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5420 "not available on non-amdpal OSes")).str()); 5421 } 5422 5423 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5424 PALMetadata->setLegacy(); 5425 for (;;) { 5426 uint32_t Key, Value; 5427 if (ParseAsAbsoluteExpression(Key)) { 5428 return TokError(Twine("invalid value in ") + 5429 Twine(PALMD::AssemblerDirective)); 5430 } 5431 if (!trySkipToken(AsmToken::Comma)) { 5432 return TokError(Twine("expected an even number of values in ") + 5433 Twine(PALMD::AssemblerDirective)); 5434 } 5435 if (ParseAsAbsoluteExpression(Value)) { 5436 return TokError(Twine("invalid value in ") + 5437 Twine(PALMD::AssemblerDirective)); 5438 } 5439 PALMetadata->setRegister(Key, Value); 5440 if (!trySkipToken(AsmToken::Comma)) 5441 break; 5442 } 5443 return false; 5444 } 5445 5446 /// ParseDirectiveAMDGPULDS 5447 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5448 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5449 if (getParser().checkForValidSection()) 5450 return true; 5451 5452 StringRef Name; 5453 SMLoc NameLoc = getLoc(); 5454 if (getParser().parseIdentifier(Name)) 5455 return TokError("expected identifier in directive"); 5456 5457 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5458 if (parseToken(AsmToken::Comma, "expected ','")) 5459 return true; 5460 5461 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5462 5463 int64_t Size; 5464 SMLoc SizeLoc = getLoc(); 5465 if (getParser().parseAbsoluteExpression(Size)) 5466 return true; 5467 if (Size < 0) 5468 return Error(SizeLoc, "size must be non-negative"); 5469 if (Size > LocalMemorySize) 5470 return Error(SizeLoc, "size is too large"); 5471 5472 int64_t Alignment = 4; 5473 if (trySkipToken(AsmToken::Comma)) { 5474 SMLoc AlignLoc = getLoc(); 5475 if (getParser().parseAbsoluteExpression(Alignment)) 5476 return true; 5477 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5478 return Error(AlignLoc, "alignment must be a power of two"); 5479 5480 // Alignment larger than the size of LDS is possible in theory, as long 5481 // as the linker manages to place to symbol at address 0, but we do want 5482 // to make sure the alignment fits nicely into a 32-bit integer. 5483 if (Alignment >= 1u << 31) 5484 return Error(AlignLoc, "alignment is too large"); 5485 } 5486 5487 if (parseToken(AsmToken::EndOfStatement, 5488 "unexpected token in '.amdgpu_lds' directive")) 5489 return true; 5490 5491 Symbol->redefineIfPossible(); 5492 if (!Symbol->isUndefined()) 5493 return Error(NameLoc, "invalid symbol redefinition"); 5494 5495 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5496 return false; 5497 } 5498 5499 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5500 StringRef IDVal = DirectiveID.getString(); 5501 5502 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5503 if (IDVal == ".amdhsa_kernel") 5504 return ParseDirectiveAMDHSAKernel(); 5505 5506 // TODO: Restructure/combine with PAL metadata directive. 5507 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5508 return ParseDirectiveHSAMetadata(); 5509 } else { 5510 if (IDVal == ".hsa_code_object_version") 5511 return ParseDirectiveHSACodeObjectVersion(); 5512 5513 if (IDVal == ".hsa_code_object_isa") 5514 return ParseDirectiveHSACodeObjectISA(); 5515 5516 if (IDVal == ".amd_kernel_code_t") 5517 return ParseDirectiveAMDKernelCodeT(); 5518 5519 if (IDVal == ".amdgpu_hsa_kernel") 5520 return ParseDirectiveAMDGPUHsaKernel(); 5521 5522 if (IDVal == ".amd_amdgpu_isa") 5523 return ParseDirectiveISAVersion(); 5524 5525 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5526 return ParseDirectiveHSAMetadata(); 5527 } 5528 5529 if (IDVal == ".amdgcn_target") 5530 return ParseDirectiveAMDGCNTarget(); 5531 5532 if (IDVal == ".amdgpu_lds") 5533 return ParseDirectiveAMDGPULDS(); 5534 5535 if (IDVal == PALMD::AssemblerDirectiveBegin) 5536 return ParseDirectivePALMetadataBegin(); 5537 5538 if (IDVal == PALMD::AssemblerDirective) 5539 return ParseDirectivePALMetadata(); 5540 5541 return true; 5542 } 5543 5544 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5545 unsigned RegNo) { 5546 5547 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5548 return isGFX9Plus(); 5549 5550 // GFX10 has 2 more SGPRs 104 and 105. 5551 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5552 return hasSGPR104_SGPR105(); 5553 5554 switch (RegNo) { 5555 case AMDGPU::SRC_SHARED_BASE: 5556 case AMDGPU::SRC_SHARED_LIMIT: 5557 case AMDGPU::SRC_PRIVATE_BASE: 5558 case AMDGPU::SRC_PRIVATE_LIMIT: 5559 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5560 return isGFX9Plus(); 5561 case AMDGPU::TBA: 5562 case AMDGPU::TBA_LO: 5563 case AMDGPU::TBA_HI: 5564 case AMDGPU::TMA: 5565 case AMDGPU::TMA_LO: 5566 case AMDGPU::TMA_HI: 5567 return !isGFX9Plus(); 5568 case AMDGPU::XNACK_MASK: 5569 case AMDGPU::XNACK_MASK_LO: 5570 case AMDGPU::XNACK_MASK_HI: 5571 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5572 case AMDGPU::SGPR_NULL: 5573 return isGFX10Plus(); 5574 default: 5575 break; 5576 } 5577 5578 if (isCI()) 5579 return true; 5580 5581 if (isSI() || isGFX10Plus()) { 5582 // No flat_scr on SI. 5583 // On GFX10 flat scratch is not a valid register operand and can only be 5584 // accessed with s_setreg/s_getreg. 5585 switch (RegNo) { 5586 case AMDGPU::FLAT_SCR: 5587 case AMDGPU::FLAT_SCR_LO: 5588 case AMDGPU::FLAT_SCR_HI: 5589 return false; 5590 default: 5591 return true; 5592 } 5593 } 5594 5595 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5596 // SI/CI have. 5597 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5598 return hasSGPR102_SGPR103(); 5599 5600 return true; 5601 } 5602 5603 OperandMatchResultTy 5604 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5605 OperandMode Mode) { 5606 // Try to parse with a custom parser 5607 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5608 5609 // If we successfully parsed the operand or if there as an error parsing, 5610 // we are done. 5611 // 5612 // If we are parsing after we reach EndOfStatement then this means we 5613 // are appending default values to the Operands list. This is only done 5614 // by custom parser, so we shouldn't continue on to the generic parsing. 5615 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5616 isToken(AsmToken::EndOfStatement)) 5617 return ResTy; 5618 5619 SMLoc RBraceLoc; 5620 SMLoc LBraceLoc = getLoc(); 5621 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5622 unsigned Prefix = Operands.size(); 5623 5624 for (;;) { 5625 auto Loc = getLoc(); 5626 ResTy = parseReg(Operands); 5627 if (ResTy == MatchOperand_NoMatch) 5628 Error(Loc, "expected a register"); 5629 if (ResTy != MatchOperand_Success) 5630 return MatchOperand_ParseFail; 5631 5632 RBraceLoc = getLoc(); 5633 if (trySkipToken(AsmToken::RBrac)) 5634 break; 5635 5636 if (!skipToken(AsmToken::Comma, 5637 "expected a comma or a closing square bracket")) { 5638 return MatchOperand_ParseFail; 5639 } 5640 } 5641 5642 if (Operands.size() - Prefix > 1) { 5643 Operands.insert(Operands.begin() + Prefix, 5644 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5645 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5646 } 5647 5648 return MatchOperand_Success; 5649 } 5650 5651 return parseRegOrImm(Operands); 5652 } 5653 5654 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5655 // Clear any forced encodings from the previous instruction. 5656 setForcedEncodingSize(0); 5657 setForcedDPP(false); 5658 setForcedSDWA(false); 5659 5660 if (Name.endswith("_e64")) { 5661 setForcedEncodingSize(64); 5662 return Name.substr(0, Name.size() - 4); 5663 } else if (Name.endswith("_e32")) { 5664 setForcedEncodingSize(32); 5665 return Name.substr(0, Name.size() - 4); 5666 } else if (Name.endswith("_dpp")) { 5667 setForcedDPP(true); 5668 return Name.substr(0, Name.size() - 4); 5669 } else if (Name.endswith("_sdwa")) { 5670 setForcedSDWA(true); 5671 return Name.substr(0, Name.size() - 5); 5672 } 5673 return Name; 5674 } 5675 5676 static void applyMnemonicAliases(StringRef &Mnemonic, 5677 const FeatureBitset &Features, 5678 unsigned VariantID); 5679 5680 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5681 StringRef Name, 5682 SMLoc NameLoc, OperandVector &Operands) { 5683 // Add the instruction mnemonic 5684 Name = parseMnemonicSuffix(Name); 5685 5686 // If the target architecture uses MnemonicAlias, call it here to parse 5687 // operands correctly. 5688 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5689 5690 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5691 5692 bool IsMIMG = Name.startswith("image_"); 5693 5694 while (!trySkipToken(AsmToken::EndOfStatement)) { 5695 OperandMode Mode = OperandMode_Default; 5696 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5697 Mode = OperandMode_NSA; 5698 CPolSeen = 0; 5699 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5700 5701 if (Res != MatchOperand_Success) { 5702 checkUnsupportedInstruction(Name, NameLoc); 5703 if (!Parser.hasPendingError()) { 5704 // FIXME: use real operand location rather than the current location. 5705 StringRef Msg = 5706 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5707 "not a valid operand."; 5708 Error(getLoc(), Msg); 5709 } 5710 while (!trySkipToken(AsmToken::EndOfStatement)) { 5711 lex(); 5712 } 5713 return true; 5714 } 5715 5716 // Eat the comma or space if there is one. 5717 trySkipToken(AsmToken::Comma); 5718 } 5719 5720 return false; 5721 } 5722 5723 //===----------------------------------------------------------------------===// 5724 // Utility functions 5725 //===----------------------------------------------------------------------===// 5726 5727 OperandMatchResultTy 5728 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5729 5730 if (!trySkipId(Prefix, AsmToken::Colon)) 5731 return MatchOperand_NoMatch; 5732 5733 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5734 } 5735 5736 OperandMatchResultTy 5737 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5738 AMDGPUOperand::ImmTy ImmTy, 5739 bool (*ConvertResult)(int64_t&)) { 5740 SMLoc S = getLoc(); 5741 int64_t Value = 0; 5742 5743 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5744 if (Res != MatchOperand_Success) 5745 return Res; 5746 5747 if (ConvertResult && !ConvertResult(Value)) { 5748 Error(S, "invalid " + StringRef(Prefix) + " value."); 5749 } 5750 5751 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5752 return MatchOperand_Success; 5753 } 5754 5755 OperandMatchResultTy 5756 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5757 OperandVector &Operands, 5758 AMDGPUOperand::ImmTy ImmTy, 5759 bool (*ConvertResult)(int64_t&)) { 5760 SMLoc S = getLoc(); 5761 if (!trySkipId(Prefix, AsmToken::Colon)) 5762 return MatchOperand_NoMatch; 5763 5764 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5765 return MatchOperand_ParseFail; 5766 5767 unsigned Val = 0; 5768 const unsigned MaxSize = 4; 5769 5770 // FIXME: How to verify the number of elements matches the number of src 5771 // operands? 5772 for (int I = 0; ; ++I) { 5773 int64_t Op; 5774 SMLoc Loc = getLoc(); 5775 if (!parseExpr(Op)) 5776 return MatchOperand_ParseFail; 5777 5778 if (Op != 0 && Op != 1) { 5779 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5780 return MatchOperand_ParseFail; 5781 } 5782 5783 Val |= (Op << I); 5784 5785 if (trySkipToken(AsmToken::RBrac)) 5786 break; 5787 5788 if (I + 1 == MaxSize) { 5789 Error(getLoc(), "expected a closing square bracket"); 5790 return MatchOperand_ParseFail; 5791 } 5792 5793 if (!skipToken(AsmToken::Comma, "expected a comma")) 5794 return MatchOperand_ParseFail; 5795 } 5796 5797 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5798 return MatchOperand_Success; 5799 } 5800 5801 OperandMatchResultTy 5802 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5803 AMDGPUOperand::ImmTy ImmTy) { 5804 int64_t Bit; 5805 SMLoc S = getLoc(); 5806 5807 if (trySkipId(Name)) { 5808 Bit = 1; 5809 } else if (trySkipId("no", Name)) { 5810 Bit = 0; 5811 } else { 5812 return MatchOperand_NoMatch; 5813 } 5814 5815 if (Name == "r128" && !hasMIMG_R128()) { 5816 Error(S, "r128 modifier is not supported on this GPU"); 5817 return MatchOperand_ParseFail; 5818 } 5819 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5820 Error(S, "a16 modifier is not supported on this GPU"); 5821 return MatchOperand_ParseFail; 5822 } 5823 5824 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5825 ImmTy = AMDGPUOperand::ImmTyR128A16; 5826 5827 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5828 return MatchOperand_Success; 5829 } 5830 5831 OperandMatchResultTy 5832 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5833 unsigned CPolOn = 0; 5834 unsigned CPolOff = 0; 5835 SMLoc S = getLoc(); 5836 5837 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5838 if (isGFX940() && !Mnemo.startswith("s_")) { 5839 if (trySkipId("sc0")) 5840 CPolOn = AMDGPU::CPol::SC0; 5841 else if (trySkipId("nosc0")) 5842 CPolOff = AMDGPU::CPol::SC0; 5843 else if (trySkipId("nt")) 5844 CPolOn = AMDGPU::CPol::NT; 5845 else if (trySkipId("nont")) 5846 CPolOff = AMDGPU::CPol::NT; 5847 else if (trySkipId("sc1")) 5848 CPolOn = AMDGPU::CPol::SC1; 5849 else if (trySkipId("nosc1")) 5850 CPolOff = AMDGPU::CPol::SC1; 5851 else 5852 return MatchOperand_NoMatch; 5853 } 5854 else if (trySkipId("glc")) 5855 CPolOn = AMDGPU::CPol::GLC; 5856 else if (trySkipId("noglc")) 5857 CPolOff = AMDGPU::CPol::GLC; 5858 else if (trySkipId("slc")) 5859 CPolOn = AMDGPU::CPol::SLC; 5860 else if (trySkipId("noslc")) 5861 CPolOff = AMDGPU::CPol::SLC; 5862 else if (trySkipId("dlc")) 5863 CPolOn = AMDGPU::CPol::DLC; 5864 else if (trySkipId("nodlc")) 5865 CPolOff = AMDGPU::CPol::DLC; 5866 else if (trySkipId("scc")) 5867 CPolOn = AMDGPU::CPol::SCC; 5868 else if (trySkipId("noscc")) 5869 CPolOff = AMDGPU::CPol::SCC; 5870 else 5871 return MatchOperand_NoMatch; 5872 5873 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5874 Error(S, "dlc modifier is not supported on this GPU"); 5875 return MatchOperand_ParseFail; 5876 } 5877 5878 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5879 Error(S, "scc modifier is not supported on this GPU"); 5880 return MatchOperand_ParseFail; 5881 } 5882 5883 if (CPolSeen & (CPolOn | CPolOff)) { 5884 Error(S, "duplicate cache policy modifier"); 5885 return MatchOperand_ParseFail; 5886 } 5887 5888 CPolSeen |= (CPolOn | CPolOff); 5889 5890 for (unsigned I = 1; I != Operands.size(); ++I) { 5891 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5892 if (Op.isCPol()) { 5893 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5894 return MatchOperand_Success; 5895 } 5896 } 5897 5898 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5899 AMDGPUOperand::ImmTyCPol)); 5900 5901 return MatchOperand_Success; 5902 } 5903 5904 static void addOptionalImmOperand( 5905 MCInst& Inst, const OperandVector& Operands, 5906 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5907 AMDGPUOperand::ImmTy ImmT, 5908 int64_t Default = 0) { 5909 auto i = OptionalIdx.find(ImmT); 5910 if (i != OptionalIdx.end()) { 5911 unsigned Idx = i->second; 5912 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5913 } else { 5914 Inst.addOperand(MCOperand::createImm(Default)); 5915 } 5916 } 5917 5918 OperandMatchResultTy 5919 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5920 StringRef &Value, 5921 SMLoc &StringLoc) { 5922 if (!trySkipId(Prefix, AsmToken::Colon)) 5923 return MatchOperand_NoMatch; 5924 5925 StringLoc = getLoc(); 5926 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5927 : MatchOperand_ParseFail; 5928 } 5929 5930 //===----------------------------------------------------------------------===// 5931 // MTBUF format 5932 //===----------------------------------------------------------------------===// 5933 5934 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5935 int64_t MaxVal, 5936 int64_t &Fmt) { 5937 int64_t Val; 5938 SMLoc Loc = getLoc(); 5939 5940 auto Res = parseIntWithPrefix(Pref, Val); 5941 if (Res == MatchOperand_ParseFail) 5942 return false; 5943 if (Res == MatchOperand_NoMatch) 5944 return true; 5945 5946 if (Val < 0 || Val > MaxVal) { 5947 Error(Loc, Twine("out of range ", StringRef(Pref))); 5948 return false; 5949 } 5950 5951 Fmt = Val; 5952 return true; 5953 } 5954 5955 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5956 // values to live in a joint format operand in the MCInst encoding. 5957 OperandMatchResultTy 5958 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5959 using namespace llvm::AMDGPU::MTBUFFormat; 5960 5961 int64_t Dfmt = DFMT_UNDEF; 5962 int64_t Nfmt = NFMT_UNDEF; 5963 5964 // dfmt and nfmt can appear in either order, and each is optional. 5965 for (int I = 0; I < 2; ++I) { 5966 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5967 return MatchOperand_ParseFail; 5968 5969 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5970 return MatchOperand_ParseFail; 5971 } 5972 // Skip optional comma between dfmt/nfmt 5973 // but guard against 2 commas following each other. 5974 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5975 !peekToken().is(AsmToken::Comma)) { 5976 trySkipToken(AsmToken::Comma); 5977 } 5978 } 5979 5980 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5981 return MatchOperand_NoMatch; 5982 5983 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5984 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5985 5986 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5987 return MatchOperand_Success; 5988 } 5989 5990 OperandMatchResultTy 5991 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5992 using namespace llvm::AMDGPU::MTBUFFormat; 5993 5994 int64_t Fmt = UFMT_UNDEF; 5995 5996 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5997 return MatchOperand_ParseFail; 5998 5999 if (Fmt == UFMT_UNDEF) 6000 return MatchOperand_NoMatch; 6001 6002 Format = Fmt; 6003 return MatchOperand_Success; 6004 } 6005 6006 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6007 int64_t &Nfmt, 6008 StringRef FormatStr, 6009 SMLoc Loc) { 6010 using namespace llvm::AMDGPU::MTBUFFormat; 6011 int64_t Format; 6012 6013 Format = getDfmt(FormatStr); 6014 if (Format != DFMT_UNDEF) { 6015 Dfmt = Format; 6016 return true; 6017 } 6018 6019 Format = getNfmt(FormatStr, getSTI()); 6020 if (Format != NFMT_UNDEF) { 6021 Nfmt = Format; 6022 return true; 6023 } 6024 6025 Error(Loc, "unsupported format"); 6026 return false; 6027 } 6028 6029 OperandMatchResultTy 6030 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6031 SMLoc FormatLoc, 6032 int64_t &Format) { 6033 using namespace llvm::AMDGPU::MTBUFFormat; 6034 6035 int64_t Dfmt = DFMT_UNDEF; 6036 int64_t Nfmt = NFMT_UNDEF; 6037 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6038 return MatchOperand_ParseFail; 6039 6040 if (trySkipToken(AsmToken::Comma)) { 6041 StringRef Str; 6042 SMLoc Loc = getLoc(); 6043 if (!parseId(Str, "expected a format string") || 6044 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6045 return MatchOperand_ParseFail; 6046 } 6047 if (Dfmt == DFMT_UNDEF) { 6048 Error(Loc, "duplicate numeric format"); 6049 return MatchOperand_ParseFail; 6050 } else if (Nfmt == NFMT_UNDEF) { 6051 Error(Loc, "duplicate data format"); 6052 return MatchOperand_ParseFail; 6053 } 6054 } 6055 6056 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6057 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6058 6059 if (isGFX10Plus()) { 6060 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6061 if (Ufmt == UFMT_UNDEF) { 6062 Error(FormatLoc, "unsupported format"); 6063 return MatchOperand_ParseFail; 6064 } 6065 Format = Ufmt; 6066 } else { 6067 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6068 } 6069 6070 return MatchOperand_Success; 6071 } 6072 6073 OperandMatchResultTy 6074 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6075 SMLoc Loc, 6076 int64_t &Format) { 6077 using namespace llvm::AMDGPU::MTBUFFormat; 6078 6079 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6080 if (Id == UFMT_UNDEF) 6081 return MatchOperand_NoMatch; 6082 6083 if (!isGFX10Plus()) { 6084 Error(Loc, "unified format is not supported on this GPU"); 6085 return MatchOperand_ParseFail; 6086 } 6087 6088 Format = Id; 6089 return MatchOperand_Success; 6090 } 6091 6092 OperandMatchResultTy 6093 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6094 using namespace llvm::AMDGPU::MTBUFFormat; 6095 SMLoc Loc = getLoc(); 6096 6097 if (!parseExpr(Format)) 6098 return MatchOperand_ParseFail; 6099 if (!isValidFormatEncoding(Format, getSTI())) { 6100 Error(Loc, "out of range format"); 6101 return MatchOperand_ParseFail; 6102 } 6103 6104 return MatchOperand_Success; 6105 } 6106 6107 OperandMatchResultTy 6108 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6109 using namespace llvm::AMDGPU::MTBUFFormat; 6110 6111 if (!trySkipId("format", AsmToken::Colon)) 6112 return MatchOperand_NoMatch; 6113 6114 if (trySkipToken(AsmToken::LBrac)) { 6115 StringRef FormatStr; 6116 SMLoc Loc = getLoc(); 6117 if (!parseId(FormatStr, "expected a format string")) 6118 return MatchOperand_ParseFail; 6119 6120 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6121 if (Res == MatchOperand_NoMatch) 6122 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6123 if (Res != MatchOperand_Success) 6124 return Res; 6125 6126 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6127 return MatchOperand_ParseFail; 6128 6129 return MatchOperand_Success; 6130 } 6131 6132 return parseNumericFormat(Format); 6133 } 6134 6135 OperandMatchResultTy 6136 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6137 using namespace llvm::AMDGPU::MTBUFFormat; 6138 6139 int64_t Format = getDefaultFormatEncoding(getSTI()); 6140 OperandMatchResultTy Res; 6141 SMLoc Loc = getLoc(); 6142 6143 // Parse legacy format syntax. 6144 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6145 if (Res == MatchOperand_ParseFail) 6146 return Res; 6147 6148 bool FormatFound = (Res == MatchOperand_Success); 6149 6150 Operands.push_back( 6151 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6152 6153 if (FormatFound) 6154 trySkipToken(AsmToken::Comma); 6155 6156 if (isToken(AsmToken::EndOfStatement)) { 6157 // We are expecting an soffset operand, 6158 // but let matcher handle the error. 6159 return MatchOperand_Success; 6160 } 6161 6162 // Parse soffset. 6163 Res = parseRegOrImm(Operands); 6164 if (Res != MatchOperand_Success) 6165 return Res; 6166 6167 trySkipToken(AsmToken::Comma); 6168 6169 if (!FormatFound) { 6170 Res = parseSymbolicOrNumericFormat(Format); 6171 if (Res == MatchOperand_ParseFail) 6172 return Res; 6173 if (Res == MatchOperand_Success) { 6174 auto Size = Operands.size(); 6175 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6176 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6177 Op.setImm(Format); 6178 } 6179 return MatchOperand_Success; 6180 } 6181 6182 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6183 Error(getLoc(), "duplicate format"); 6184 return MatchOperand_ParseFail; 6185 } 6186 return MatchOperand_Success; 6187 } 6188 6189 //===----------------------------------------------------------------------===// 6190 // ds 6191 //===----------------------------------------------------------------------===// 6192 6193 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6194 const OperandVector &Operands) { 6195 OptionalImmIndexMap OptionalIdx; 6196 6197 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6198 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6199 6200 // Add the register arguments 6201 if (Op.isReg()) { 6202 Op.addRegOperands(Inst, 1); 6203 continue; 6204 } 6205 6206 // Handle optional arguments 6207 OptionalIdx[Op.getImmTy()] = i; 6208 } 6209 6210 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6211 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6212 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6213 6214 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6215 } 6216 6217 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6218 bool IsGdsHardcoded) { 6219 OptionalImmIndexMap OptionalIdx; 6220 6221 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6222 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6223 6224 // Add the register arguments 6225 if (Op.isReg()) { 6226 Op.addRegOperands(Inst, 1); 6227 continue; 6228 } 6229 6230 if (Op.isToken() && Op.getToken() == "gds") { 6231 IsGdsHardcoded = true; 6232 continue; 6233 } 6234 6235 // Handle optional arguments 6236 OptionalIdx[Op.getImmTy()] = i; 6237 } 6238 6239 AMDGPUOperand::ImmTy OffsetType = 6240 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6241 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6242 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6243 AMDGPUOperand::ImmTyOffset; 6244 6245 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6246 6247 if (!IsGdsHardcoded) { 6248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6249 } 6250 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6251 } 6252 6253 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6254 OptionalImmIndexMap OptionalIdx; 6255 6256 unsigned OperandIdx[4]; 6257 unsigned EnMask = 0; 6258 int SrcIdx = 0; 6259 6260 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6261 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6262 6263 // Add the register arguments 6264 if (Op.isReg()) { 6265 assert(SrcIdx < 4); 6266 OperandIdx[SrcIdx] = Inst.size(); 6267 Op.addRegOperands(Inst, 1); 6268 ++SrcIdx; 6269 continue; 6270 } 6271 6272 if (Op.isOff()) { 6273 assert(SrcIdx < 4); 6274 OperandIdx[SrcIdx] = Inst.size(); 6275 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6276 ++SrcIdx; 6277 continue; 6278 } 6279 6280 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6281 Op.addImmOperands(Inst, 1); 6282 continue; 6283 } 6284 6285 if (Op.isToken() && Op.getToken() == "done") 6286 continue; 6287 6288 // Handle optional arguments 6289 OptionalIdx[Op.getImmTy()] = i; 6290 } 6291 6292 assert(SrcIdx == 4); 6293 6294 bool Compr = false; 6295 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6296 Compr = true; 6297 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6298 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6299 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6300 } 6301 6302 for (auto i = 0; i < SrcIdx; ++i) { 6303 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6304 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6305 } 6306 } 6307 6308 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6309 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6310 6311 Inst.addOperand(MCOperand::createImm(EnMask)); 6312 } 6313 6314 //===----------------------------------------------------------------------===// 6315 // s_waitcnt 6316 //===----------------------------------------------------------------------===// 6317 6318 static bool 6319 encodeCnt( 6320 const AMDGPU::IsaVersion ISA, 6321 int64_t &IntVal, 6322 int64_t CntVal, 6323 bool Saturate, 6324 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6325 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6326 { 6327 bool Failed = false; 6328 6329 IntVal = encode(ISA, IntVal, CntVal); 6330 if (CntVal != decode(ISA, IntVal)) { 6331 if (Saturate) { 6332 IntVal = encode(ISA, IntVal, -1); 6333 } else { 6334 Failed = true; 6335 } 6336 } 6337 return Failed; 6338 } 6339 6340 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6341 6342 SMLoc CntLoc = getLoc(); 6343 StringRef CntName = getTokenStr(); 6344 6345 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6346 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6347 return false; 6348 6349 int64_t CntVal; 6350 SMLoc ValLoc = getLoc(); 6351 if (!parseExpr(CntVal)) 6352 return false; 6353 6354 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6355 6356 bool Failed = true; 6357 bool Sat = CntName.endswith("_sat"); 6358 6359 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6360 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6361 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6362 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6363 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6364 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6365 } else { 6366 Error(CntLoc, "invalid counter name " + CntName); 6367 return false; 6368 } 6369 6370 if (Failed) { 6371 Error(ValLoc, "too large value for " + CntName); 6372 return false; 6373 } 6374 6375 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6376 return false; 6377 6378 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6379 if (isToken(AsmToken::EndOfStatement)) { 6380 Error(getLoc(), "expected a counter name"); 6381 return false; 6382 } 6383 } 6384 6385 return true; 6386 } 6387 6388 OperandMatchResultTy 6389 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6390 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6391 int64_t Waitcnt = getWaitcntBitMask(ISA); 6392 SMLoc S = getLoc(); 6393 6394 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6395 while (!isToken(AsmToken::EndOfStatement)) { 6396 if (!parseCnt(Waitcnt)) 6397 return MatchOperand_ParseFail; 6398 } 6399 } else { 6400 if (!parseExpr(Waitcnt)) 6401 return MatchOperand_ParseFail; 6402 } 6403 6404 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6405 return MatchOperand_Success; 6406 } 6407 6408 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6409 SMLoc FieldLoc = getLoc(); 6410 StringRef FieldName = getTokenStr(); 6411 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6412 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6413 return false; 6414 6415 SMLoc ValueLoc = getLoc(); 6416 StringRef ValueName = getTokenStr(); 6417 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6418 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6419 return false; 6420 6421 unsigned Shift; 6422 if (FieldName == "instid0") { 6423 Shift = 0; 6424 } else if (FieldName == "instskip") { 6425 Shift = 4; 6426 } else if (FieldName == "instid1") { 6427 Shift = 7; 6428 } else { 6429 Error(FieldLoc, "invalid field name " + FieldName); 6430 return false; 6431 } 6432 6433 int Value; 6434 if (Shift == 4) { 6435 // Parse values for instskip. 6436 Value = StringSwitch<int>(ValueName) 6437 .Case("SAME", 0) 6438 .Case("NEXT", 1) 6439 .Case("SKIP_1", 2) 6440 .Case("SKIP_2", 3) 6441 .Case("SKIP_3", 4) 6442 .Case("SKIP_4", 5) 6443 .Default(-1); 6444 } else { 6445 // Parse values for instid0 and instid1. 6446 Value = StringSwitch<int>(ValueName) 6447 .Case("NO_DEP", 0) 6448 .Case("VALU_DEP_1", 1) 6449 .Case("VALU_DEP_2", 2) 6450 .Case("VALU_DEP_3", 3) 6451 .Case("VALU_DEP_4", 4) 6452 .Case("TRANS32_DEP_1", 5) 6453 .Case("TRANS32_DEP_2", 6) 6454 .Case("TRANS32_DEP_3", 7) 6455 .Case("FMA_ACCUM_CYCLE_1", 8) 6456 .Case("SALU_CYCLE_1", 9) 6457 .Case("SALU_CYCLE_2", 10) 6458 .Case("SALU_CYCLE_3", 11) 6459 .Default(-1); 6460 } 6461 if (Value < 0) { 6462 Error(ValueLoc, "invalid value name " + ValueName); 6463 return false; 6464 } 6465 6466 Delay |= Value << Shift; 6467 return true; 6468 } 6469 6470 OperandMatchResultTy 6471 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6472 int64_t Delay = 0; 6473 SMLoc S = getLoc(); 6474 6475 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6476 do { 6477 if (!parseDelay(Delay)) 6478 return MatchOperand_ParseFail; 6479 } while (trySkipToken(AsmToken::Pipe)); 6480 } else { 6481 if (!parseExpr(Delay)) 6482 return MatchOperand_ParseFail; 6483 } 6484 6485 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6486 return MatchOperand_Success; 6487 } 6488 6489 bool 6490 AMDGPUOperand::isSWaitCnt() const { 6491 return isImm(); 6492 } 6493 6494 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6495 6496 //===----------------------------------------------------------------------===// 6497 // DepCtr 6498 //===----------------------------------------------------------------------===// 6499 6500 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6501 StringRef DepCtrName) { 6502 switch (ErrorId) { 6503 case OPR_ID_UNKNOWN: 6504 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6505 return; 6506 case OPR_ID_UNSUPPORTED: 6507 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6508 return; 6509 case OPR_ID_DUPLICATE: 6510 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6511 return; 6512 case OPR_VAL_INVALID: 6513 Error(Loc, Twine("invalid value for ", DepCtrName)); 6514 return; 6515 default: 6516 assert(false); 6517 } 6518 } 6519 6520 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6521 6522 using namespace llvm::AMDGPU::DepCtr; 6523 6524 SMLoc DepCtrLoc = getLoc(); 6525 StringRef DepCtrName = getTokenStr(); 6526 6527 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6528 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6529 return false; 6530 6531 int64_t ExprVal; 6532 if (!parseExpr(ExprVal)) 6533 return false; 6534 6535 unsigned PrevOprMask = UsedOprMask; 6536 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6537 6538 if (CntVal < 0) { 6539 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6540 return false; 6541 } 6542 6543 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6544 return false; 6545 6546 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6547 if (isToken(AsmToken::EndOfStatement)) { 6548 Error(getLoc(), "expected a counter name"); 6549 return false; 6550 } 6551 } 6552 6553 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6554 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6555 return true; 6556 } 6557 6558 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6559 using namespace llvm::AMDGPU::DepCtr; 6560 6561 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6562 SMLoc Loc = getLoc(); 6563 6564 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6565 unsigned UsedOprMask = 0; 6566 while (!isToken(AsmToken::EndOfStatement)) { 6567 if (!parseDepCtr(DepCtr, UsedOprMask)) 6568 return MatchOperand_ParseFail; 6569 } 6570 } else { 6571 if (!parseExpr(DepCtr)) 6572 return MatchOperand_ParseFail; 6573 } 6574 6575 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6576 return MatchOperand_Success; 6577 } 6578 6579 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6580 6581 //===----------------------------------------------------------------------===// 6582 // hwreg 6583 //===----------------------------------------------------------------------===// 6584 6585 bool 6586 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6587 OperandInfoTy &Offset, 6588 OperandInfoTy &Width) { 6589 using namespace llvm::AMDGPU::Hwreg; 6590 6591 // The register may be specified by name or using a numeric code 6592 HwReg.Loc = getLoc(); 6593 if (isToken(AsmToken::Identifier) && 6594 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6595 HwReg.IsSymbolic = true; 6596 lex(); // skip register name 6597 } else if (!parseExpr(HwReg.Id, "a register name")) { 6598 return false; 6599 } 6600 6601 if (trySkipToken(AsmToken::RParen)) 6602 return true; 6603 6604 // parse optional params 6605 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6606 return false; 6607 6608 Offset.Loc = getLoc(); 6609 if (!parseExpr(Offset.Id)) 6610 return false; 6611 6612 if (!skipToken(AsmToken::Comma, "expected a comma")) 6613 return false; 6614 6615 Width.Loc = getLoc(); 6616 return parseExpr(Width.Id) && 6617 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6618 } 6619 6620 bool 6621 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6622 const OperandInfoTy &Offset, 6623 const OperandInfoTy &Width) { 6624 6625 using namespace llvm::AMDGPU::Hwreg; 6626 6627 if (HwReg.IsSymbolic) { 6628 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6629 Error(HwReg.Loc, 6630 "specified hardware register is not supported on this GPU"); 6631 return false; 6632 } 6633 } else { 6634 if (!isValidHwreg(HwReg.Id)) { 6635 Error(HwReg.Loc, 6636 "invalid code of hardware register: only 6-bit values are legal"); 6637 return false; 6638 } 6639 } 6640 if (!isValidHwregOffset(Offset.Id)) { 6641 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6642 return false; 6643 } 6644 if (!isValidHwregWidth(Width.Id)) { 6645 Error(Width.Loc, 6646 "invalid bitfield width: only values from 1 to 32 are legal"); 6647 return false; 6648 } 6649 return true; 6650 } 6651 6652 OperandMatchResultTy 6653 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6654 using namespace llvm::AMDGPU::Hwreg; 6655 6656 int64_t ImmVal = 0; 6657 SMLoc Loc = getLoc(); 6658 6659 if (trySkipId("hwreg", AsmToken::LParen)) { 6660 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6661 OperandInfoTy Offset(OFFSET_DEFAULT_); 6662 OperandInfoTy Width(WIDTH_DEFAULT_); 6663 if (parseHwregBody(HwReg, Offset, Width) && 6664 validateHwreg(HwReg, Offset, Width)) { 6665 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6666 } else { 6667 return MatchOperand_ParseFail; 6668 } 6669 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6670 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6671 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6672 return MatchOperand_ParseFail; 6673 } 6674 } else { 6675 return MatchOperand_ParseFail; 6676 } 6677 6678 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6679 return MatchOperand_Success; 6680 } 6681 6682 bool AMDGPUOperand::isHwreg() const { 6683 return isImmTy(ImmTyHwreg); 6684 } 6685 6686 //===----------------------------------------------------------------------===// 6687 // sendmsg 6688 //===----------------------------------------------------------------------===// 6689 6690 bool 6691 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6692 OperandInfoTy &Op, 6693 OperandInfoTy &Stream) { 6694 using namespace llvm::AMDGPU::SendMsg; 6695 6696 Msg.Loc = getLoc(); 6697 if (isToken(AsmToken::Identifier) && 6698 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6699 Msg.IsSymbolic = true; 6700 lex(); // skip message name 6701 } else if (!parseExpr(Msg.Id, "a message name")) { 6702 return false; 6703 } 6704 6705 if (trySkipToken(AsmToken::Comma)) { 6706 Op.IsDefined = true; 6707 Op.Loc = getLoc(); 6708 if (isToken(AsmToken::Identifier) && 6709 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6710 lex(); // skip operation name 6711 } else if (!parseExpr(Op.Id, "an operation name")) { 6712 return false; 6713 } 6714 6715 if (trySkipToken(AsmToken::Comma)) { 6716 Stream.IsDefined = true; 6717 Stream.Loc = getLoc(); 6718 if (!parseExpr(Stream.Id)) 6719 return false; 6720 } 6721 } 6722 6723 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6724 } 6725 6726 bool 6727 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6728 const OperandInfoTy &Op, 6729 const OperandInfoTy &Stream) { 6730 using namespace llvm::AMDGPU::SendMsg; 6731 6732 // Validation strictness depends on whether message is specified 6733 // in a symbolic or in a numeric form. In the latter case 6734 // only encoding possibility is checked. 6735 bool Strict = Msg.IsSymbolic; 6736 6737 if (Strict) { 6738 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6739 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6740 return false; 6741 } 6742 } else { 6743 if (!isValidMsgId(Msg.Id, getSTI())) { 6744 Error(Msg.Loc, "invalid message id"); 6745 return false; 6746 } 6747 } 6748 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6749 if (Op.IsDefined) { 6750 Error(Op.Loc, "message does not support operations"); 6751 } else { 6752 Error(Msg.Loc, "missing message operation"); 6753 } 6754 return false; 6755 } 6756 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6757 Error(Op.Loc, "invalid operation id"); 6758 return false; 6759 } 6760 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6761 Stream.IsDefined) { 6762 Error(Stream.Loc, "message operation does not support streams"); 6763 return false; 6764 } 6765 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6766 Error(Stream.Loc, "invalid message stream id"); 6767 return false; 6768 } 6769 return true; 6770 } 6771 6772 OperandMatchResultTy 6773 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6774 using namespace llvm::AMDGPU::SendMsg; 6775 6776 int64_t ImmVal = 0; 6777 SMLoc Loc = getLoc(); 6778 6779 if (trySkipId("sendmsg", AsmToken::LParen)) { 6780 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6781 OperandInfoTy Op(OP_NONE_); 6782 OperandInfoTy Stream(STREAM_ID_NONE_); 6783 if (parseSendMsgBody(Msg, Op, Stream) && 6784 validateSendMsg(Msg, Op, Stream)) { 6785 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6786 } else { 6787 return MatchOperand_ParseFail; 6788 } 6789 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6790 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6791 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6792 return MatchOperand_ParseFail; 6793 } 6794 } else { 6795 return MatchOperand_ParseFail; 6796 } 6797 6798 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6799 return MatchOperand_Success; 6800 } 6801 6802 bool AMDGPUOperand::isSendMsg() const { 6803 return isImmTy(ImmTySendMsg); 6804 } 6805 6806 //===----------------------------------------------------------------------===// 6807 // v_interp 6808 //===----------------------------------------------------------------------===// 6809 6810 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6811 StringRef Str; 6812 SMLoc S = getLoc(); 6813 6814 if (!parseId(Str)) 6815 return MatchOperand_NoMatch; 6816 6817 int Slot = StringSwitch<int>(Str) 6818 .Case("p10", 0) 6819 .Case("p20", 1) 6820 .Case("p0", 2) 6821 .Default(-1); 6822 6823 if (Slot == -1) { 6824 Error(S, "invalid interpolation slot"); 6825 return MatchOperand_ParseFail; 6826 } 6827 6828 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6829 AMDGPUOperand::ImmTyInterpSlot)); 6830 return MatchOperand_Success; 6831 } 6832 6833 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6834 StringRef Str; 6835 SMLoc S = getLoc(); 6836 6837 if (!parseId(Str)) 6838 return MatchOperand_NoMatch; 6839 6840 if (!Str.startswith("attr")) { 6841 Error(S, "invalid interpolation attribute"); 6842 return MatchOperand_ParseFail; 6843 } 6844 6845 StringRef Chan = Str.take_back(2); 6846 int AttrChan = StringSwitch<int>(Chan) 6847 .Case(".x", 0) 6848 .Case(".y", 1) 6849 .Case(".z", 2) 6850 .Case(".w", 3) 6851 .Default(-1); 6852 if (AttrChan == -1) { 6853 Error(S, "invalid or missing interpolation attribute channel"); 6854 return MatchOperand_ParseFail; 6855 } 6856 6857 Str = Str.drop_back(2).drop_front(4); 6858 6859 uint8_t Attr; 6860 if (Str.getAsInteger(10, Attr)) { 6861 Error(S, "invalid or missing interpolation attribute number"); 6862 return MatchOperand_ParseFail; 6863 } 6864 6865 if (Attr > 63) { 6866 Error(S, "out of bounds interpolation attribute number"); 6867 return MatchOperand_ParseFail; 6868 } 6869 6870 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6871 6872 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6873 AMDGPUOperand::ImmTyInterpAttr)); 6874 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6875 AMDGPUOperand::ImmTyAttrChan)); 6876 return MatchOperand_Success; 6877 } 6878 6879 //===----------------------------------------------------------------------===// 6880 // exp 6881 //===----------------------------------------------------------------------===// 6882 6883 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6884 using namespace llvm::AMDGPU::Exp; 6885 6886 StringRef Str; 6887 SMLoc S = getLoc(); 6888 6889 if (!parseId(Str)) 6890 return MatchOperand_NoMatch; 6891 6892 unsigned Id = getTgtId(Str); 6893 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6894 Error(S, (Id == ET_INVALID) ? 6895 "invalid exp target" : 6896 "exp target is not supported on this GPU"); 6897 return MatchOperand_ParseFail; 6898 } 6899 6900 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6901 AMDGPUOperand::ImmTyExpTgt)); 6902 return MatchOperand_Success; 6903 } 6904 6905 //===----------------------------------------------------------------------===// 6906 // parser helpers 6907 //===----------------------------------------------------------------------===// 6908 6909 bool 6910 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6911 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6912 } 6913 6914 bool 6915 AMDGPUAsmParser::isId(const StringRef Id) const { 6916 return isId(getToken(), Id); 6917 } 6918 6919 bool 6920 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6921 return getTokenKind() == Kind; 6922 } 6923 6924 bool 6925 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6926 if (isId(Id)) { 6927 lex(); 6928 return true; 6929 } 6930 return false; 6931 } 6932 6933 bool 6934 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6935 if (isToken(AsmToken::Identifier)) { 6936 StringRef Tok = getTokenStr(); 6937 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6938 lex(); 6939 return true; 6940 } 6941 } 6942 return false; 6943 } 6944 6945 bool 6946 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6947 if (isId(Id) && peekToken().is(Kind)) { 6948 lex(); 6949 lex(); 6950 return true; 6951 } 6952 return false; 6953 } 6954 6955 bool 6956 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6957 if (isToken(Kind)) { 6958 lex(); 6959 return true; 6960 } 6961 return false; 6962 } 6963 6964 bool 6965 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6966 const StringRef ErrMsg) { 6967 if (!trySkipToken(Kind)) { 6968 Error(getLoc(), ErrMsg); 6969 return false; 6970 } 6971 return true; 6972 } 6973 6974 bool 6975 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6976 SMLoc S = getLoc(); 6977 6978 const MCExpr *Expr; 6979 if (Parser.parseExpression(Expr)) 6980 return false; 6981 6982 if (Expr->evaluateAsAbsolute(Imm)) 6983 return true; 6984 6985 if (Expected.empty()) { 6986 Error(S, "expected absolute expression"); 6987 } else { 6988 Error(S, Twine("expected ", Expected) + 6989 Twine(" or an absolute expression")); 6990 } 6991 return false; 6992 } 6993 6994 bool 6995 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6996 SMLoc S = getLoc(); 6997 6998 const MCExpr *Expr; 6999 if (Parser.parseExpression(Expr)) 7000 return false; 7001 7002 int64_t IntVal; 7003 if (Expr->evaluateAsAbsolute(IntVal)) { 7004 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 7005 } else { 7006 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7007 } 7008 return true; 7009 } 7010 7011 bool 7012 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7013 if (isToken(AsmToken::String)) { 7014 Val = getToken().getStringContents(); 7015 lex(); 7016 return true; 7017 } else { 7018 Error(getLoc(), ErrMsg); 7019 return false; 7020 } 7021 } 7022 7023 bool 7024 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7025 if (isToken(AsmToken::Identifier)) { 7026 Val = getTokenStr(); 7027 lex(); 7028 return true; 7029 } else { 7030 if (!ErrMsg.empty()) 7031 Error(getLoc(), ErrMsg); 7032 return false; 7033 } 7034 } 7035 7036 AsmToken 7037 AMDGPUAsmParser::getToken() const { 7038 return Parser.getTok(); 7039 } 7040 7041 AsmToken 7042 AMDGPUAsmParser::peekToken() { 7043 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7044 } 7045 7046 void 7047 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7048 auto TokCount = getLexer().peekTokens(Tokens); 7049 7050 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7051 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7052 } 7053 7054 AsmToken::TokenKind 7055 AMDGPUAsmParser::getTokenKind() const { 7056 return getLexer().getKind(); 7057 } 7058 7059 SMLoc 7060 AMDGPUAsmParser::getLoc() const { 7061 return getToken().getLoc(); 7062 } 7063 7064 StringRef 7065 AMDGPUAsmParser::getTokenStr() const { 7066 return getToken().getString(); 7067 } 7068 7069 void 7070 AMDGPUAsmParser::lex() { 7071 Parser.Lex(); 7072 } 7073 7074 SMLoc 7075 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7076 const OperandVector &Operands) const { 7077 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7078 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7079 if (Test(Op)) 7080 return Op.getStartLoc(); 7081 } 7082 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7083 } 7084 7085 SMLoc 7086 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7087 const OperandVector &Operands) const { 7088 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7089 return getOperandLoc(Test, Operands); 7090 } 7091 7092 SMLoc 7093 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7094 const OperandVector &Operands) const { 7095 auto Test = [=](const AMDGPUOperand& Op) { 7096 return Op.isRegKind() && Op.getReg() == Reg; 7097 }; 7098 return getOperandLoc(Test, Operands); 7099 } 7100 7101 SMLoc 7102 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7103 auto Test = [](const AMDGPUOperand& Op) { 7104 return Op.IsImmKindLiteral() || Op.isExpr(); 7105 }; 7106 return getOperandLoc(Test, Operands); 7107 } 7108 7109 SMLoc 7110 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7111 auto Test = [](const AMDGPUOperand& Op) { 7112 return Op.isImmKindConst(); 7113 }; 7114 return getOperandLoc(Test, Operands); 7115 } 7116 7117 //===----------------------------------------------------------------------===// 7118 // swizzle 7119 //===----------------------------------------------------------------------===// 7120 7121 LLVM_READNONE 7122 static unsigned 7123 encodeBitmaskPerm(const unsigned AndMask, 7124 const unsigned OrMask, 7125 const unsigned XorMask) { 7126 using namespace llvm::AMDGPU::Swizzle; 7127 7128 return BITMASK_PERM_ENC | 7129 (AndMask << BITMASK_AND_SHIFT) | 7130 (OrMask << BITMASK_OR_SHIFT) | 7131 (XorMask << BITMASK_XOR_SHIFT); 7132 } 7133 7134 bool 7135 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7136 const unsigned MinVal, 7137 const unsigned MaxVal, 7138 const StringRef ErrMsg, 7139 SMLoc &Loc) { 7140 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7141 return false; 7142 } 7143 Loc = getLoc(); 7144 if (!parseExpr(Op)) { 7145 return false; 7146 } 7147 if (Op < MinVal || Op > MaxVal) { 7148 Error(Loc, ErrMsg); 7149 return false; 7150 } 7151 7152 return true; 7153 } 7154 7155 bool 7156 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7157 const unsigned MinVal, 7158 const unsigned MaxVal, 7159 const StringRef ErrMsg) { 7160 SMLoc Loc; 7161 for (unsigned i = 0; i < OpNum; ++i) { 7162 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7163 return false; 7164 } 7165 7166 return true; 7167 } 7168 7169 bool 7170 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7171 using namespace llvm::AMDGPU::Swizzle; 7172 7173 int64_t Lane[LANE_NUM]; 7174 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7175 "expected a 2-bit lane id")) { 7176 Imm = QUAD_PERM_ENC; 7177 for (unsigned I = 0; I < LANE_NUM; ++I) { 7178 Imm |= Lane[I] << (LANE_SHIFT * I); 7179 } 7180 return true; 7181 } 7182 return false; 7183 } 7184 7185 bool 7186 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7187 using namespace llvm::AMDGPU::Swizzle; 7188 7189 SMLoc Loc; 7190 int64_t GroupSize; 7191 int64_t LaneIdx; 7192 7193 if (!parseSwizzleOperand(GroupSize, 7194 2, 32, 7195 "group size must be in the interval [2,32]", 7196 Loc)) { 7197 return false; 7198 } 7199 if (!isPowerOf2_64(GroupSize)) { 7200 Error(Loc, "group size must be a power of two"); 7201 return false; 7202 } 7203 if (parseSwizzleOperand(LaneIdx, 7204 0, GroupSize - 1, 7205 "lane id must be in the interval [0,group size - 1]", 7206 Loc)) { 7207 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7208 return true; 7209 } 7210 return false; 7211 } 7212 7213 bool 7214 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7215 using namespace llvm::AMDGPU::Swizzle; 7216 7217 SMLoc Loc; 7218 int64_t GroupSize; 7219 7220 if (!parseSwizzleOperand(GroupSize, 7221 2, 32, 7222 "group size must be in the interval [2,32]", 7223 Loc)) { 7224 return false; 7225 } 7226 if (!isPowerOf2_64(GroupSize)) { 7227 Error(Loc, "group size must be a power of two"); 7228 return false; 7229 } 7230 7231 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7232 return true; 7233 } 7234 7235 bool 7236 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7237 using namespace llvm::AMDGPU::Swizzle; 7238 7239 SMLoc Loc; 7240 int64_t GroupSize; 7241 7242 if (!parseSwizzleOperand(GroupSize, 7243 1, 16, 7244 "group size must be in the interval [1,16]", 7245 Loc)) { 7246 return false; 7247 } 7248 if (!isPowerOf2_64(GroupSize)) { 7249 Error(Loc, "group size must be a power of two"); 7250 return false; 7251 } 7252 7253 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7254 return true; 7255 } 7256 7257 bool 7258 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7259 using namespace llvm::AMDGPU::Swizzle; 7260 7261 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7262 return false; 7263 } 7264 7265 StringRef Ctl; 7266 SMLoc StrLoc = getLoc(); 7267 if (!parseString(Ctl)) { 7268 return false; 7269 } 7270 if (Ctl.size() != BITMASK_WIDTH) { 7271 Error(StrLoc, "expected a 5-character mask"); 7272 return false; 7273 } 7274 7275 unsigned AndMask = 0; 7276 unsigned OrMask = 0; 7277 unsigned XorMask = 0; 7278 7279 for (size_t i = 0; i < Ctl.size(); ++i) { 7280 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7281 switch(Ctl[i]) { 7282 default: 7283 Error(StrLoc, "invalid mask"); 7284 return false; 7285 case '0': 7286 break; 7287 case '1': 7288 OrMask |= Mask; 7289 break; 7290 case 'p': 7291 AndMask |= Mask; 7292 break; 7293 case 'i': 7294 AndMask |= Mask; 7295 XorMask |= Mask; 7296 break; 7297 } 7298 } 7299 7300 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7301 return true; 7302 } 7303 7304 bool 7305 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7306 7307 SMLoc OffsetLoc = getLoc(); 7308 7309 if (!parseExpr(Imm, "a swizzle macro")) { 7310 return false; 7311 } 7312 if (!isUInt<16>(Imm)) { 7313 Error(OffsetLoc, "expected a 16-bit offset"); 7314 return false; 7315 } 7316 return true; 7317 } 7318 7319 bool 7320 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7321 using namespace llvm::AMDGPU::Swizzle; 7322 7323 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7324 7325 SMLoc ModeLoc = getLoc(); 7326 bool Ok = false; 7327 7328 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7329 Ok = parseSwizzleQuadPerm(Imm); 7330 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7331 Ok = parseSwizzleBitmaskPerm(Imm); 7332 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7333 Ok = parseSwizzleBroadcast(Imm); 7334 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7335 Ok = parseSwizzleSwap(Imm); 7336 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7337 Ok = parseSwizzleReverse(Imm); 7338 } else { 7339 Error(ModeLoc, "expected a swizzle mode"); 7340 } 7341 7342 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7343 } 7344 7345 return false; 7346 } 7347 7348 OperandMatchResultTy 7349 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7350 SMLoc S = getLoc(); 7351 int64_t Imm = 0; 7352 7353 if (trySkipId("offset")) { 7354 7355 bool Ok = false; 7356 if (skipToken(AsmToken::Colon, "expected a colon")) { 7357 if (trySkipId("swizzle")) { 7358 Ok = parseSwizzleMacro(Imm); 7359 } else { 7360 Ok = parseSwizzleOffset(Imm); 7361 } 7362 } 7363 7364 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7365 7366 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7367 } else { 7368 // Swizzle "offset" operand is optional. 7369 // If it is omitted, try parsing other optional operands. 7370 return parseOptionalOpr(Operands); 7371 } 7372 } 7373 7374 bool 7375 AMDGPUOperand::isSwizzle() const { 7376 return isImmTy(ImmTySwizzle); 7377 } 7378 7379 //===----------------------------------------------------------------------===// 7380 // VGPR Index Mode 7381 //===----------------------------------------------------------------------===// 7382 7383 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7384 7385 using namespace llvm::AMDGPU::VGPRIndexMode; 7386 7387 if (trySkipToken(AsmToken::RParen)) { 7388 return OFF; 7389 } 7390 7391 int64_t Imm = 0; 7392 7393 while (true) { 7394 unsigned Mode = 0; 7395 SMLoc S = getLoc(); 7396 7397 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7398 if (trySkipId(IdSymbolic[ModeId])) { 7399 Mode = 1 << ModeId; 7400 break; 7401 } 7402 } 7403 7404 if (Mode == 0) { 7405 Error(S, (Imm == 0)? 7406 "expected a VGPR index mode or a closing parenthesis" : 7407 "expected a VGPR index mode"); 7408 return UNDEF; 7409 } 7410 7411 if (Imm & Mode) { 7412 Error(S, "duplicate VGPR index mode"); 7413 return UNDEF; 7414 } 7415 Imm |= Mode; 7416 7417 if (trySkipToken(AsmToken::RParen)) 7418 break; 7419 if (!skipToken(AsmToken::Comma, 7420 "expected a comma or a closing parenthesis")) 7421 return UNDEF; 7422 } 7423 7424 return Imm; 7425 } 7426 7427 OperandMatchResultTy 7428 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7429 7430 using namespace llvm::AMDGPU::VGPRIndexMode; 7431 7432 int64_t Imm = 0; 7433 SMLoc S = getLoc(); 7434 7435 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7436 Imm = parseGPRIdxMacro(); 7437 if (Imm == UNDEF) 7438 return MatchOperand_ParseFail; 7439 } else { 7440 if (getParser().parseAbsoluteExpression(Imm)) 7441 return MatchOperand_ParseFail; 7442 if (Imm < 0 || !isUInt<4>(Imm)) { 7443 Error(S, "invalid immediate: only 4-bit values are legal"); 7444 return MatchOperand_ParseFail; 7445 } 7446 } 7447 7448 Operands.push_back( 7449 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7450 return MatchOperand_Success; 7451 } 7452 7453 bool AMDGPUOperand::isGPRIdxMode() const { 7454 return isImmTy(ImmTyGprIdxMode); 7455 } 7456 7457 //===----------------------------------------------------------------------===// 7458 // sopp branch targets 7459 //===----------------------------------------------------------------------===// 7460 7461 OperandMatchResultTy 7462 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7463 7464 // Make sure we are not parsing something 7465 // that looks like a label or an expression but is not. 7466 // This will improve error messages. 7467 if (isRegister() || isModifier()) 7468 return MatchOperand_NoMatch; 7469 7470 if (!parseExpr(Operands)) 7471 return MatchOperand_ParseFail; 7472 7473 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7474 assert(Opr.isImm() || Opr.isExpr()); 7475 SMLoc Loc = Opr.getStartLoc(); 7476 7477 // Currently we do not support arbitrary expressions as branch targets. 7478 // Only labels and absolute expressions are accepted. 7479 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7480 Error(Loc, "expected an absolute expression or a label"); 7481 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7482 Error(Loc, "expected a 16-bit signed jump offset"); 7483 } 7484 7485 return MatchOperand_Success; 7486 } 7487 7488 //===----------------------------------------------------------------------===// 7489 // Boolean holding registers 7490 //===----------------------------------------------------------------------===// 7491 7492 OperandMatchResultTy 7493 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7494 return parseReg(Operands); 7495 } 7496 7497 //===----------------------------------------------------------------------===// 7498 // mubuf 7499 //===----------------------------------------------------------------------===// 7500 7501 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7502 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7503 } 7504 7505 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7506 const OperandVector &Operands, 7507 bool IsAtomic, 7508 bool IsLds) { 7509 OptionalImmIndexMap OptionalIdx; 7510 unsigned FirstOperandIdx = 1; 7511 bool IsAtomicReturn = false; 7512 7513 if (IsAtomic) { 7514 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7515 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7516 if (!Op.isCPol()) 7517 continue; 7518 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7519 break; 7520 } 7521 7522 if (!IsAtomicReturn) { 7523 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7524 if (NewOpc != -1) 7525 Inst.setOpcode(NewOpc); 7526 } 7527 7528 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7529 SIInstrFlags::IsAtomicRet; 7530 } 7531 7532 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7533 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7534 7535 // Add the register arguments 7536 if (Op.isReg()) { 7537 Op.addRegOperands(Inst, 1); 7538 // Insert a tied src for atomic return dst. 7539 // This cannot be postponed as subsequent calls to 7540 // addImmOperands rely on correct number of MC operands. 7541 if (IsAtomicReturn && i == FirstOperandIdx) 7542 Op.addRegOperands(Inst, 1); 7543 continue; 7544 } 7545 7546 // Handle the case where soffset is an immediate 7547 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7548 Op.addImmOperands(Inst, 1); 7549 continue; 7550 } 7551 7552 // Handle tokens like 'offen' which are sometimes hard-coded into the 7553 // asm string. There are no MCInst operands for these. 7554 if (Op.isToken()) { 7555 continue; 7556 } 7557 assert(Op.isImm()); 7558 7559 // Handle optional arguments 7560 OptionalIdx[Op.getImmTy()] = i; 7561 } 7562 7563 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7564 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7565 7566 if (!IsLds) { // tfe is not legal with lds opcodes 7567 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7568 } 7569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7570 } 7571 7572 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7573 OptionalImmIndexMap OptionalIdx; 7574 7575 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7576 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7577 7578 // Add the register arguments 7579 if (Op.isReg()) { 7580 Op.addRegOperands(Inst, 1); 7581 continue; 7582 } 7583 7584 // Handle the case where soffset is an immediate 7585 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7586 Op.addImmOperands(Inst, 1); 7587 continue; 7588 } 7589 7590 // Handle tokens like 'offen' which are sometimes hard-coded into the 7591 // asm string. There are no MCInst operands for these. 7592 if (Op.isToken()) { 7593 continue; 7594 } 7595 assert(Op.isImm()); 7596 7597 // Handle optional arguments 7598 OptionalIdx[Op.getImmTy()] = i; 7599 } 7600 7601 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7602 AMDGPUOperand::ImmTyOffset); 7603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7604 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7606 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7607 } 7608 7609 //===----------------------------------------------------------------------===// 7610 // mimg 7611 //===----------------------------------------------------------------------===// 7612 7613 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7614 bool IsAtomic) { 7615 unsigned I = 1; 7616 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7617 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7618 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7619 } 7620 7621 if (IsAtomic) { 7622 // Add src, same as dst 7623 assert(Desc.getNumDefs() == 1); 7624 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7625 } 7626 7627 OptionalImmIndexMap OptionalIdx; 7628 7629 for (unsigned E = Operands.size(); I != E; ++I) { 7630 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7631 7632 // Add the register arguments 7633 if (Op.isReg()) { 7634 Op.addRegOperands(Inst, 1); 7635 } else if (Op.isImmModifier()) { 7636 OptionalIdx[Op.getImmTy()] = I; 7637 } else if (!Op.isToken()) { 7638 llvm_unreachable("unexpected operand type"); 7639 } 7640 } 7641 7642 bool IsGFX10Plus = isGFX10Plus(); 7643 7644 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7645 if (IsGFX10Plus) 7646 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7647 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7649 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7650 if (IsGFX10Plus) 7651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7652 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7653 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7654 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7655 if (!IsGFX10Plus) 7656 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7658 } 7659 7660 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7661 cvtMIMG(Inst, Operands, true); 7662 } 7663 7664 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7665 OptionalImmIndexMap OptionalIdx; 7666 bool IsAtomicReturn = false; 7667 7668 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7669 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7670 if (!Op.isCPol()) 7671 continue; 7672 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7673 break; 7674 } 7675 7676 if (!IsAtomicReturn) { 7677 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7678 if (NewOpc != -1) 7679 Inst.setOpcode(NewOpc); 7680 } 7681 7682 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7683 SIInstrFlags::IsAtomicRet; 7684 7685 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7686 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7687 7688 // Add the register arguments 7689 if (Op.isReg()) { 7690 Op.addRegOperands(Inst, 1); 7691 if (IsAtomicReturn && i == 1) 7692 Op.addRegOperands(Inst, 1); 7693 continue; 7694 } 7695 7696 // Handle the case where soffset is an immediate 7697 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7698 Op.addImmOperands(Inst, 1); 7699 continue; 7700 } 7701 7702 // Handle tokens like 'offen' which are sometimes hard-coded into the 7703 // asm string. There are no MCInst operands for these. 7704 if (Op.isToken()) { 7705 continue; 7706 } 7707 assert(Op.isImm()); 7708 7709 // Handle optional arguments 7710 OptionalIdx[Op.getImmTy()] = i; 7711 } 7712 7713 if ((int)Inst.getNumOperands() <= 7714 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7716 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7717 } 7718 7719 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7720 const OperandVector &Operands) { 7721 for (unsigned I = 1; I < Operands.size(); ++I) { 7722 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7723 if (Operand.isReg()) 7724 Operand.addRegOperands(Inst, 1); 7725 } 7726 7727 Inst.addOperand(MCOperand::createImm(1)); // a16 7728 } 7729 7730 //===----------------------------------------------------------------------===// 7731 // smrd 7732 //===----------------------------------------------------------------------===// 7733 7734 bool AMDGPUOperand::isSMRDOffset8() const { 7735 return isImm() && isUInt<8>(getImm()); 7736 } 7737 7738 bool AMDGPUOperand::isSMEMOffset() const { 7739 return isImmTy(ImmTyNone) || 7740 isImmTy(ImmTyOffset); // Offset range is checked later by validator. 7741 } 7742 7743 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7744 // 32-bit literals are only supported on CI and we only want to use them 7745 // when the offset is > 8-bits. 7746 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7747 } 7748 7749 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7750 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7751 } 7752 7753 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7754 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7755 } 7756 7757 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7758 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7759 } 7760 7761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7762 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7763 } 7764 7765 //===----------------------------------------------------------------------===// 7766 // vop3 7767 //===----------------------------------------------------------------------===// 7768 7769 static bool ConvertOmodMul(int64_t &Mul) { 7770 if (Mul != 1 && Mul != 2 && Mul != 4) 7771 return false; 7772 7773 Mul >>= 1; 7774 return true; 7775 } 7776 7777 static bool ConvertOmodDiv(int64_t &Div) { 7778 if (Div == 1) { 7779 Div = 0; 7780 return true; 7781 } 7782 7783 if (Div == 2) { 7784 Div = 3; 7785 return true; 7786 } 7787 7788 return false; 7789 } 7790 7791 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7792 // This is intentional and ensures compatibility with sp3. 7793 // See bug 35397 for details. 7794 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7795 if (BoundCtrl == 0 || BoundCtrl == 1) { 7796 BoundCtrl = 1; 7797 return true; 7798 } 7799 return false; 7800 } 7801 7802 // Note: the order in this table matches the order of operands in AsmString. 7803 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7804 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7805 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7806 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7807 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7808 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7809 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7810 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7811 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7812 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7813 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7814 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7815 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7816 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7817 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7818 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7819 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7820 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7821 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7822 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7823 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7824 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7825 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7826 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7827 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7828 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7829 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7830 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7831 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7832 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7833 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7834 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7835 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7836 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7837 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7838 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7839 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7840 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7841 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7842 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7843 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7844 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}, 7845 {"wait_vdst", AMDGPUOperand::ImmTyWaitVDST, false, nullptr} 7846 }; 7847 7848 void AMDGPUAsmParser::onBeginOfFile() { 7849 if (!getParser().getStreamer().getTargetStreamer() || 7850 getSTI().getTargetTriple().getArch() == Triple::r600) 7851 return; 7852 7853 if (!getTargetStreamer().getTargetID()) 7854 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7855 7856 if (isHsaAbiVersion3AndAbove(&getSTI())) 7857 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7858 } 7859 7860 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7861 7862 OperandMatchResultTy res = parseOptionalOpr(Operands); 7863 7864 // This is a hack to enable hardcoded mandatory operands which follow 7865 // optional operands. 7866 // 7867 // Current design assumes that all operands after the first optional operand 7868 // are also optional. However implementation of some instructions violates 7869 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7870 // 7871 // To alleviate this problem, we have to (implicitly) parse extra operands 7872 // to make sure autogenerated parser of custom operands never hit hardcoded 7873 // mandatory operands. 7874 7875 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7876 if (res != MatchOperand_Success || 7877 isToken(AsmToken::EndOfStatement)) 7878 break; 7879 7880 trySkipToken(AsmToken::Comma); 7881 res = parseOptionalOpr(Operands); 7882 } 7883 7884 return res; 7885 } 7886 7887 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7888 OperandMatchResultTy res; 7889 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7890 // try to parse any optional operand here 7891 if (Op.IsBit) { 7892 res = parseNamedBit(Op.Name, Operands, Op.Type); 7893 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7894 res = parseOModOperand(Operands); 7895 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7896 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7897 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7898 res = parseSDWASel(Operands, Op.Name, Op.Type); 7899 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7900 res = parseSDWADstUnused(Operands); 7901 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7902 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7903 Op.Type == AMDGPUOperand::ImmTyNegLo || 7904 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7905 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7906 Op.ConvertResult); 7907 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7908 res = parseDim(Operands); 7909 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7910 res = parseCPol(Operands); 7911 } else { 7912 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7913 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7914 res = parseOperandArrayWithPrefix("neg", Operands, 7915 AMDGPUOperand::ImmTyBLGP, 7916 nullptr); 7917 } 7918 } 7919 if (res != MatchOperand_NoMatch) { 7920 return res; 7921 } 7922 } 7923 return MatchOperand_NoMatch; 7924 } 7925 7926 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7927 StringRef Name = getTokenStr(); 7928 if (Name == "mul") { 7929 return parseIntWithPrefix("mul", Operands, 7930 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7931 } 7932 7933 if (Name == "div") { 7934 return parseIntWithPrefix("div", Operands, 7935 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7936 } 7937 7938 return MatchOperand_NoMatch; 7939 } 7940 7941 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7942 cvtVOP3P(Inst, Operands); 7943 7944 int Opc = Inst.getOpcode(); 7945 7946 int SrcNum; 7947 const int Ops[] = { AMDGPU::OpName::src0, 7948 AMDGPU::OpName::src1, 7949 AMDGPU::OpName::src2 }; 7950 for (SrcNum = 0; 7951 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7952 ++SrcNum); 7953 assert(SrcNum > 0); 7954 7955 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7956 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7957 7958 if ((OpSel & (1 << SrcNum)) != 0) { 7959 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7960 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7961 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7962 } 7963 } 7964 7965 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7966 // 1. This operand is input modifiers 7967 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7968 // 2. This is not last operand 7969 && Desc.NumOperands > (OpNum + 1) 7970 // 3. Next operand is register class 7971 && Desc.OpInfo[OpNum + 1].RegClass != -1 7972 // 4. Next register is not tied to any other operand 7973 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7974 } 7975 7976 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7977 { 7978 OptionalImmIndexMap OptionalIdx; 7979 unsigned Opc = Inst.getOpcode(); 7980 7981 unsigned I = 1; 7982 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7983 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7984 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7985 } 7986 7987 for (unsigned E = Operands.size(); I != E; ++I) { 7988 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7989 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7990 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7991 } else if (Op.isInterpSlot() || 7992 Op.isInterpAttr() || 7993 Op.isAttrChan()) { 7994 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7995 } else if (Op.isImmModifier()) { 7996 OptionalIdx[Op.getImmTy()] = I; 7997 } else { 7998 llvm_unreachable("unhandled operand type"); 7999 } 8000 } 8001 8002 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 8003 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 8004 } 8005 8006 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8008 } 8009 8010 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8012 } 8013 } 8014 8015 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8016 OptionalImmIndexMap &OptionalIdx) { 8017 unsigned Opc = Inst.getOpcode(); 8018 8019 unsigned I = 1; 8020 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8021 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8022 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8023 } 8024 8025 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8026 // This instruction has src modifiers 8027 for (unsigned E = Operands.size(); I != E; ++I) { 8028 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8029 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8030 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8031 } else if (Op.isImmModifier()) { 8032 OptionalIdx[Op.getImmTy()] = I; 8033 } else if (Op.isRegOrImm()) { 8034 Op.addRegOrImmOperands(Inst, 1); 8035 } else { 8036 llvm_unreachable("unhandled operand type"); 8037 } 8038 } 8039 } else { 8040 // No src modifiers 8041 for (unsigned E = Operands.size(); I != E; ++I) { 8042 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8043 if (Op.isMod()) { 8044 OptionalIdx[Op.getImmTy()] = I; 8045 } else { 8046 Op.addRegOrImmOperands(Inst, 1); 8047 } 8048 } 8049 } 8050 8051 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8052 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8053 } 8054 8055 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8056 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8057 } 8058 8059 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8060 // it has src2 register operand that is tied to dst operand 8061 // we don't allow modifiers for this operand in assembler so src2_modifiers 8062 // should be 0. 8063 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8064 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8065 Opc == AMDGPU::V_MAC_F32_e64_vi || 8066 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8067 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8068 Opc == AMDGPU::V_MAC_F16_e64_vi || 8069 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8070 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8071 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8072 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8073 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 8074 auto it = Inst.begin(); 8075 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8076 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8077 ++it; 8078 // Copy the operand to ensure it's not invalidated when Inst grows. 8079 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8080 } 8081 } 8082 8083 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8084 OptionalImmIndexMap OptionalIdx; 8085 cvtVOP3(Inst, Operands, OptionalIdx); 8086 } 8087 8088 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8089 OptionalImmIndexMap &OptIdx) { 8090 const int Opc = Inst.getOpcode(); 8091 const MCInstrDesc &Desc = MII.get(Opc); 8092 8093 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8094 8095 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8096 assert(!IsPacked); 8097 Inst.addOperand(Inst.getOperand(0)); 8098 } 8099 8100 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8101 // instruction, and then figure out where to actually put the modifiers 8102 8103 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8104 if (OpSelIdx != -1) { 8105 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8106 } 8107 8108 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8109 if (OpSelHiIdx != -1) { 8110 int DefaultVal = IsPacked ? -1 : 0; 8111 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8112 DefaultVal); 8113 } 8114 8115 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8116 if (NegLoIdx != -1) { 8117 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8118 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8119 } 8120 8121 const int Ops[] = { AMDGPU::OpName::src0, 8122 AMDGPU::OpName::src1, 8123 AMDGPU::OpName::src2 }; 8124 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8125 AMDGPU::OpName::src1_modifiers, 8126 AMDGPU::OpName::src2_modifiers }; 8127 8128 unsigned OpSel = 0; 8129 unsigned OpSelHi = 0; 8130 unsigned NegLo = 0; 8131 unsigned NegHi = 0; 8132 8133 if (OpSelIdx != -1) 8134 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8135 8136 if (OpSelHiIdx != -1) 8137 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8138 8139 if (NegLoIdx != -1) { 8140 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8141 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8142 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8143 } 8144 8145 for (int J = 0; J < 3; ++J) { 8146 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8147 if (OpIdx == -1) 8148 break; 8149 8150 uint32_t ModVal = 0; 8151 8152 if ((OpSel & (1 << J)) != 0) 8153 ModVal |= SISrcMods::OP_SEL_0; 8154 8155 if ((OpSelHi & (1 << J)) != 0) 8156 ModVal |= SISrcMods::OP_SEL_1; 8157 8158 if ((NegLo & (1 << J)) != 0) 8159 ModVal |= SISrcMods::NEG; 8160 8161 if ((NegHi & (1 << J)) != 0) 8162 ModVal |= SISrcMods::NEG_HI; 8163 8164 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8165 8166 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8167 } 8168 } 8169 8170 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8171 OptionalImmIndexMap OptIdx; 8172 cvtVOP3(Inst, Operands, OptIdx); 8173 cvtVOP3P(Inst, Operands, OptIdx); 8174 } 8175 8176 //===----------------------------------------------------------------------===// 8177 // dpp 8178 //===----------------------------------------------------------------------===// 8179 8180 bool AMDGPUOperand::isDPP8() const { 8181 return isImmTy(ImmTyDPP8); 8182 } 8183 8184 bool AMDGPUOperand::isDPPCtrl() const { 8185 using namespace AMDGPU::DPP; 8186 8187 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8188 if (result) { 8189 int64_t Imm = getImm(); 8190 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8191 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8192 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8193 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8194 (Imm == DppCtrl::WAVE_SHL1) || 8195 (Imm == DppCtrl::WAVE_ROL1) || 8196 (Imm == DppCtrl::WAVE_SHR1) || 8197 (Imm == DppCtrl::WAVE_ROR1) || 8198 (Imm == DppCtrl::ROW_MIRROR) || 8199 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8200 (Imm == DppCtrl::BCAST15) || 8201 (Imm == DppCtrl::BCAST31) || 8202 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8203 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8204 } 8205 return false; 8206 } 8207 8208 //===----------------------------------------------------------------------===// 8209 // mAI 8210 //===----------------------------------------------------------------------===// 8211 8212 bool AMDGPUOperand::isBLGP() const { 8213 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8214 } 8215 8216 bool AMDGPUOperand::isCBSZ() const { 8217 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8218 } 8219 8220 bool AMDGPUOperand::isABID() const { 8221 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8222 } 8223 8224 bool AMDGPUOperand::isS16Imm() const { 8225 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8226 } 8227 8228 bool AMDGPUOperand::isU16Imm() const { 8229 return isImm() && isUInt<16>(getImm()); 8230 } 8231 8232 //===----------------------------------------------------------------------===// 8233 // dim 8234 //===----------------------------------------------------------------------===// 8235 8236 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8237 // We want to allow "dim:1D" etc., 8238 // but the initial 1 is tokenized as an integer. 8239 std::string Token; 8240 if (isToken(AsmToken::Integer)) { 8241 SMLoc Loc = getToken().getEndLoc(); 8242 Token = std::string(getTokenStr()); 8243 lex(); 8244 if (getLoc() != Loc) 8245 return false; 8246 } 8247 8248 StringRef Suffix; 8249 if (!parseId(Suffix)) 8250 return false; 8251 Token += Suffix; 8252 8253 StringRef DimId = Token; 8254 if (DimId.startswith("SQ_RSRC_IMG_")) 8255 DimId = DimId.drop_front(12); 8256 8257 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8258 if (!DimInfo) 8259 return false; 8260 8261 Encoding = DimInfo->Encoding; 8262 return true; 8263 } 8264 8265 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8266 if (!isGFX10Plus()) 8267 return MatchOperand_NoMatch; 8268 8269 SMLoc S = getLoc(); 8270 8271 if (!trySkipId("dim", AsmToken::Colon)) 8272 return MatchOperand_NoMatch; 8273 8274 unsigned Encoding; 8275 SMLoc Loc = getLoc(); 8276 if (!parseDimId(Encoding)) { 8277 Error(Loc, "invalid dim value"); 8278 return MatchOperand_ParseFail; 8279 } 8280 8281 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8282 AMDGPUOperand::ImmTyDim)); 8283 return MatchOperand_Success; 8284 } 8285 8286 //===----------------------------------------------------------------------===// 8287 // dpp 8288 //===----------------------------------------------------------------------===// 8289 8290 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8291 SMLoc S = getLoc(); 8292 8293 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8294 return MatchOperand_NoMatch; 8295 8296 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8297 8298 int64_t Sels[8]; 8299 8300 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8301 return MatchOperand_ParseFail; 8302 8303 for (size_t i = 0; i < 8; ++i) { 8304 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8305 return MatchOperand_ParseFail; 8306 8307 SMLoc Loc = getLoc(); 8308 if (getParser().parseAbsoluteExpression(Sels[i])) 8309 return MatchOperand_ParseFail; 8310 if (0 > Sels[i] || 7 < Sels[i]) { 8311 Error(Loc, "expected a 3-bit value"); 8312 return MatchOperand_ParseFail; 8313 } 8314 } 8315 8316 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8317 return MatchOperand_ParseFail; 8318 8319 unsigned DPP8 = 0; 8320 for (size_t i = 0; i < 8; ++i) 8321 DPP8 |= (Sels[i] << (i * 3)); 8322 8323 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8324 return MatchOperand_Success; 8325 } 8326 8327 bool 8328 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8329 const OperandVector &Operands) { 8330 if (Ctrl == "row_newbcast") 8331 return isGFX90A(); 8332 8333 if (Ctrl == "row_share" || 8334 Ctrl == "row_xmask") 8335 return isGFX10Plus(); 8336 8337 if (Ctrl == "wave_shl" || 8338 Ctrl == "wave_shr" || 8339 Ctrl == "wave_rol" || 8340 Ctrl == "wave_ror" || 8341 Ctrl == "row_bcast") 8342 return isVI() || isGFX9(); 8343 8344 return Ctrl == "row_mirror" || 8345 Ctrl == "row_half_mirror" || 8346 Ctrl == "quad_perm" || 8347 Ctrl == "row_shl" || 8348 Ctrl == "row_shr" || 8349 Ctrl == "row_ror"; 8350 } 8351 8352 int64_t 8353 AMDGPUAsmParser::parseDPPCtrlPerm() { 8354 // quad_perm:[%d,%d,%d,%d] 8355 8356 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8357 return -1; 8358 8359 int64_t Val = 0; 8360 for (int i = 0; i < 4; ++i) { 8361 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8362 return -1; 8363 8364 int64_t Temp; 8365 SMLoc Loc = getLoc(); 8366 if (getParser().parseAbsoluteExpression(Temp)) 8367 return -1; 8368 if (Temp < 0 || Temp > 3) { 8369 Error(Loc, "expected a 2-bit value"); 8370 return -1; 8371 } 8372 8373 Val += (Temp << i * 2); 8374 } 8375 8376 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8377 return -1; 8378 8379 return Val; 8380 } 8381 8382 int64_t 8383 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8384 using namespace AMDGPU::DPP; 8385 8386 // sel:%d 8387 8388 int64_t Val; 8389 SMLoc Loc = getLoc(); 8390 8391 if (getParser().parseAbsoluteExpression(Val)) 8392 return -1; 8393 8394 struct DppCtrlCheck { 8395 int64_t Ctrl; 8396 int Lo; 8397 int Hi; 8398 }; 8399 8400 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8401 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8402 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8403 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8404 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8405 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8406 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8407 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8408 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8409 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8410 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8411 .Default({-1, 0, 0}); 8412 8413 bool Valid; 8414 if (Check.Ctrl == -1) { 8415 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8416 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8417 } else { 8418 Valid = Check.Lo <= Val && Val <= Check.Hi; 8419 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8420 } 8421 8422 if (!Valid) { 8423 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8424 return -1; 8425 } 8426 8427 return Val; 8428 } 8429 8430 OperandMatchResultTy 8431 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8432 using namespace AMDGPU::DPP; 8433 8434 if (!isToken(AsmToken::Identifier) || 8435 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8436 return MatchOperand_NoMatch; 8437 8438 SMLoc S = getLoc(); 8439 int64_t Val = -1; 8440 StringRef Ctrl; 8441 8442 parseId(Ctrl); 8443 8444 if (Ctrl == "row_mirror") { 8445 Val = DppCtrl::ROW_MIRROR; 8446 } else if (Ctrl == "row_half_mirror") { 8447 Val = DppCtrl::ROW_HALF_MIRROR; 8448 } else { 8449 if (skipToken(AsmToken::Colon, "expected a colon")) { 8450 if (Ctrl == "quad_perm") { 8451 Val = parseDPPCtrlPerm(); 8452 } else { 8453 Val = parseDPPCtrlSel(Ctrl); 8454 } 8455 } 8456 } 8457 8458 if (Val == -1) 8459 return MatchOperand_ParseFail; 8460 8461 Operands.push_back( 8462 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8463 return MatchOperand_Success; 8464 } 8465 8466 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8467 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8468 } 8469 8470 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8471 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8472 } 8473 8474 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8475 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8476 } 8477 8478 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8479 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8480 } 8481 8482 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8483 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8484 } 8485 8486 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8487 OptionalImmIndexMap OptionalIdx; 8488 8489 unsigned Opc = Inst.getOpcode(); 8490 bool HasModifiers = 8491 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8492 unsigned I = 1; 8493 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8494 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8495 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8496 } 8497 8498 int Fi = 0; 8499 for (unsigned E = Operands.size(); I != E; ++I) { 8500 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8501 MCOI::TIED_TO); 8502 if (TiedTo != -1) { 8503 assert((unsigned)TiedTo < Inst.getNumOperands()); 8504 // handle tied old or src2 for MAC instructions 8505 Inst.addOperand(Inst.getOperand(TiedTo)); 8506 } 8507 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8508 // Add the register arguments 8509 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8510 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8511 // Skip it. 8512 continue; 8513 } 8514 8515 if (IsDPP8) { 8516 if (Op.isDPP8()) { 8517 Op.addImmOperands(Inst, 1); 8518 } else if (HasModifiers && 8519 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8520 Op.addRegWithFPInputModsOperands(Inst, 2); 8521 } else if (Op.isFI()) { 8522 Fi = Op.getImm(); 8523 } else if (Op.isReg()) { 8524 Op.addRegOperands(Inst, 1); 8525 } else { 8526 llvm_unreachable("Invalid operand type"); 8527 } 8528 } else { 8529 if (HasModifiers && 8530 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8531 Op.addRegWithFPInputModsOperands(Inst, 2); 8532 } else if (Op.isReg()) { 8533 Op.addRegOperands(Inst, 1); 8534 } else if (Op.isDPPCtrl()) { 8535 Op.addImmOperands(Inst, 1); 8536 } else if (Op.isImm()) { 8537 // Handle optional arguments 8538 OptionalIdx[Op.getImmTy()] = I; 8539 } else { 8540 llvm_unreachable("Invalid operand type"); 8541 } 8542 } 8543 } 8544 8545 if (IsDPP8) { 8546 using namespace llvm::AMDGPU::DPP; 8547 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8548 } else { 8549 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8552 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8553 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8554 } 8555 } 8556 } 8557 8558 //===----------------------------------------------------------------------===// 8559 // sdwa 8560 //===----------------------------------------------------------------------===// 8561 8562 OperandMatchResultTy 8563 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8564 AMDGPUOperand::ImmTy Type) { 8565 using namespace llvm::AMDGPU::SDWA; 8566 8567 SMLoc S = getLoc(); 8568 StringRef Value; 8569 OperandMatchResultTy res; 8570 8571 SMLoc StringLoc; 8572 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8573 if (res != MatchOperand_Success) { 8574 return res; 8575 } 8576 8577 int64_t Int; 8578 Int = StringSwitch<int64_t>(Value) 8579 .Case("BYTE_0", SdwaSel::BYTE_0) 8580 .Case("BYTE_1", SdwaSel::BYTE_1) 8581 .Case("BYTE_2", SdwaSel::BYTE_2) 8582 .Case("BYTE_3", SdwaSel::BYTE_3) 8583 .Case("WORD_0", SdwaSel::WORD_0) 8584 .Case("WORD_1", SdwaSel::WORD_1) 8585 .Case("DWORD", SdwaSel::DWORD) 8586 .Default(0xffffffff); 8587 8588 if (Int == 0xffffffff) { 8589 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8590 return MatchOperand_ParseFail; 8591 } 8592 8593 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8594 return MatchOperand_Success; 8595 } 8596 8597 OperandMatchResultTy 8598 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8599 using namespace llvm::AMDGPU::SDWA; 8600 8601 SMLoc S = getLoc(); 8602 StringRef Value; 8603 OperandMatchResultTy res; 8604 8605 SMLoc StringLoc; 8606 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8607 if (res != MatchOperand_Success) { 8608 return res; 8609 } 8610 8611 int64_t Int; 8612 Int = StringSwitch<int64_t>(Value) 8613 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8614 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8615 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8616 .Default(0xffffffff); 8617 8618 if (Int == 0xffffffff) { 8619 Error(StringLoc, "invalid dst_unused value"); 8620 return MatchOperand_ParseFail; 8621 } 8622 8623 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8624 return MatchOperand_Success; 8625 } 8626 8627 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8628 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8629 } 8630 8631 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8632 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8633 } 8634 8635 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8636 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8637 } 8638 8639 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8640 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8641 } 8642 8643 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8644 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8645 } 8646 8647 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8648 uint64_t BasicInstType, 8649 bool SkipDstVcc, 8650 bool SkipSrcVcc) { 8651 using namespace llvm::AMDGPU::SDWA; 8652 8653 OptionalImmIndexMap OptionalIdx; 8654 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8655 bool SkippedVcc = false; 8656 8657 unsigned I = 1; 8658 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8659 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8660 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8661 } 8662 8663 for (unsigned E = Operands.size(); I != E; ++I) { 8664 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8665 if (SkipVcc && !SkippedVcc && Op.isReg() && 8666 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8667 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8668 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8669 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8670 // Skip VCC only if we didn't skip it on previous iteration. 8671 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8672 if (BasicInstType == SIInstrFlags::VOP2 && 8673 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8674 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8675 SkippedVcc = true; 8676 continue; 8677 } else if (BasicInstType == SIInstrFlags::VOPC && 8678 Inst.getNumOperands() == 0) { 8679 SkippedVcc = true; 8680 continue; 8681 } 8682 } 8683 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8684 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8685 } else if (Op.isImm()) { 8686 // Handle optional arguments 8687 OptionalIdx[Op.getImmTy()] = I; 8688 } else { 8689 llvm_unreachable("Invalid operand type"); 8690 } 8691 SkippedVcc = false; 8692 } 8693 8694 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8695 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8696 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8697 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8698 switch (BasicInstType) { 8699 case SIInstrFlags::VOP1: 8700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8701 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8703 } 8704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8705 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8707 break; 8708 8709 case SIInstrFlags::VOP2: 8710 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8711 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8712 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8713 } 8714 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8716 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8717 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8718 break; 8719 8720 case SIInstrFlags::VOPC: 8721 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8722 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8723 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8724 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8725 break; 8726 8727 default: 8728 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8729 } 8730 } 8731 8732 // special case v_mac_{f16, f32}: 8733 // it has src2 register operand that is tied to dst operand 8734 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8735 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8736 auto it = Inst.begin(); 8737 std::advance( 8738 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8739 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8740 } 8741 } 8742 8743 //===----------------------------------------------------------------------===// 8744 // mAI 8745 //===----------------------------------------------------------------------===// 8746 8747 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8748 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8749 } 8750 8751 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8752 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8753 } 8754 8755 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8756 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8757 } 8758 8759 /// Force static initialization. 8760 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8761 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8762 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8763 } 8764 8765 #define GET_REGISTER_MATCHER 8766 #define GET_MATCHER_IMPLEMENTATION 8767 #define GET_MNEMONIC_SPELL_CHECKER 8768 #define GET_MNEMONIC_CHECKER 8769 #include "AMDGPUGenAsmMatcher.inc" 8770 8771 // This function should be defined after auto-generated include so that we have 8772 // MatchClassKind enum defined 8773 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8774 unsigned Kind) { 8775 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8776 // But MatchInstructionImpl() expects to meet token and fails to validate 8777 // operand. This method checks if we are given immediate operand but expect to 8778 // get corresponding token. 8779 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8780 switch (Kind) { 8781 case MCK_addr64: 8782 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8783 case MCK_gds: 8784 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8785 case MCK_lds: 8786 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8787 case MCK_idxen: 8788 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8789 case MCK_offen: 8790 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8791 case MCK_SSrcB32: 8792 // When operands have expression values, they will return true for isToken, 8793 // because it is not possible to distinguish between a token and an 8794 // expression at parse time. MatchInstructionImpl() will always try to 8795 // match an operand as a token, when isToken returns true, and when the 8796 // name of the expression is not a valid token, the match will fail, 8797 // so we need to handle it here. 8798 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8799 case MCK_SSrcF32: 8800 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8801 case MCK_SoppBrTarget: 8802 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8803 case MCK_VReg32OrOff: 8804 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8805 case MCK_InterpSlot: 8806 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8807 case MCK_Attr: 8808 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8809 case MCK_AttrChan: 8810 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8811 case MCK_ImmSMEMOffset: 8812 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8813 case MCK_SReg_64: 8814 case MCK_SReg_64_XEXEC: 8815 // Null is defined as a 32-bit register but 8816 // it should also be enabled with 64-bit operands. 8817 // The following code enables it for SReg_64 operands 8818 // used as source and destination. Remaining source 8819 // operands are handled in isInlinableImm. 8820 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8821 default: 8822 return Match_InvalidOperand; 8823 } 8824 } 8825 8826 //===----------------------------------------------------------------------===// 8827 // endpgm 8828 //===----------------------------------------------------------------------===// 8829 8830 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8831 SMLoc S = getLoc(); 8832 int64_t Imm = 0; 8833 8834 if (!parseExpr(Imm)) { 8835 // The operand is optional, if not present default to 0 8836 Imm = 0; 8837 } 8838 8839 if (!isUInt<16>(Imm)) { 8840 Error(S, "expected a 16-bit value"); 8841 return MatchOperand_ParseFail; 8842 } 8843 8844 Operands.push_back( 8845 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8846 return MatchOperand_Success; 8847 } 8848 8849 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8850 8851 //===----------------------------------------------------------------------===// 8852 // LDSDIR 8853 //===----------------------------------------------------------------------===// 8854 8855 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultWaitVDST() const { 8856 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyWaitVDST); 8857 } 8858 8859 bool AMDGPUOperand::isWaitVDST() const { 8860 return isImmTy(ImmTyWaitVDST) && isUInt<4>(getImm()); 8861 } 8862