1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isDepCtr() const; 822 bool isSDelayAlu() const; 823 bool isHwreg() const; 824 bool isSendMsg() const; 825 bool isSwizzle() const; 826 bool isSMRDOffset8() const; 827 bool isSMEMOffset() const; 828 bool isSMRDLiteralOffset() const; 829 bool isDPP8() const; 830 bool isDPPCtrl() const; 831 bool isBLGP() const; 832 bool isCBSZ() const; 833 bool isABID() const; 834 bool isGPRIdxMode() const; 835 bool isS16Imm() const; 836 bool isU16Imm() const; 837 bool isEndpgm() const; 838 839 StringRef getExpressionAsToken() const { 840 assert(isExpr()); 841 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 842 return S->getSymbol().getName(); 843 } 844 845 StringRef getToken() const { 846 assert(isToken()); 847 848 if (Kind == Expression) 849 return getExpressionAsToken(); 850 851 return StringRef(Tok.Data, Tok.Length); 852 } 853 854 int64_t getImm() const { 855 assert(isImm()); 856 return Imm.Val; 857 } 858 859 void setImm(int64_t Val) { 860 assert(isImm()); 861 Imm.Val = Val; 862 } 863 864 ImmTy getImmTy() const { 865 assert(isImm()); 866 return Imm.Type; 867 } 868 869 unsigned getReg() const override { 870 assert(isRegKind()); 871 return Reg.RegNo; 872 } 873 874 SMLoc getStartLoc() const override { 875 return StartLoc; 876 } 877 878 SMLoc getEndLoc() const override { 879 return EndLoc; 880 } 881 882 SMRange getLocRange() const { 883 return SMRange(StartLoc, EndLoc); 884 } 885 886 Modifiers getModifiers() const { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 return isRegKind() ? Reg.Mods : Imm.Mods; 889 } 890 891 void setModifiers(Modifiers Mods) { 892 assert(isRegKind() || isImmTy(ImmTyNone)); 893 if (isRegKind()) 894 Reg.Mods = Mods; 895 else 896 Imm.Mods = Mods; 897 } 898 899 bool hasModifiers() const { 900 return getModifiers().hasModifiers(); 901 } 902 903 bool hasFPModifiers() const { 904 return getModifiers().hasFPModifiers(); 905 } 906 907 bool hasIntModifiers() const { 908 return getModifiers().hasIntModifiers(); 909 } 910 911 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 912 913 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 914 915 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 916 917 template <unsigned Bitwidth> 918 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 919 920 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 921 addKImmFPOperands<16>(Inst, N); 922 } 923 924 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 925 addKImmFPOperands<32>(Inst, N); 926 } 927 928 void addRegOperands(MCInst &Inst, unsigned N) const; 929 930 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 931 addRegOperands(Inst, N); 932 } 933 934 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 935 if (isRegKind()) 936 addRegOperands(Inst, N); 937 else if (isExpr()) 938 Inst.addOperand(MCOperand::createExpr(Expr)); 939 else 940 addImmOperands(Inst, N); 941 } 942 943 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 944 Modifiers Mods = getModifiers(); 945 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 946 if (isRegKind()) { 947 addRegOperands(Inst, N); 948 } else { 949 addImmOperands(Inst, N, false); 950 } 951 } 952 953 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasIntModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 959 assert(!hasFPModifiers()); 960 addRegOrImmWithInputModsOperands(Inst, N); 961 } 962 963 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 964 Modifiers Mods = getModifiers(); 965 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 966 assert(isRegKind()); 967 addRegOperands(Inst, N); 968 } 969 970 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasIntModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasFPModifiers()); 977 addRegWithInputModsOperands(Inst, N); 978 } 979 980 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 981 if (isImm()) 982 addImmOperands(Inst, N); 983 else { 984 assert(isExpr()); 985 Inst.addOperand(MCOperand::createExpr(Expr)); 986 } 987 } 988 989 static void printImmTy(raw_ostream& OS, ImmTy Type) { 990 switch (Type) { 991 case ImmTyNone: OS << "None"; break; 992 case ImmTyGDS: OS << "GDS"; break; 993 case ImmTyLDS: OS << "LDS"; break; 994 case ImmTyOffen: OS << "Offen"; break; 995 case ImmTyIdxen: OS << "Idxen"; break; 996 case ImmTyAddr64: OS << "Addr64"; break; 997 case ImmTyOffset: OS << "Offset"; break; 998 case ImmTyInstOffset: OS << "InstOffset"; break; 999 case ImmTyOffset0: OS << "Offset0"; break; 1000 case ImmTyOffset1: OS << "Offset1"; break; 1001 case ImmTyCPol: OS << "CPol"; break; 1002 case ImmTySWZ: OS << "SWZ"; break; 1003 case ImmTyTFE: OS << "TFE"; break; 1004 case ImmTyD16: OS << "D16"; break; 1005 case ImmTyFORMAT: OS << "FORMAT"; break; 1006 case ImmTyClampSI: OS << "ClampSI"; break; 1007 case ImmTyOModSI: OS << "OModSI"; break; 1008 case ImmTyDPP8: OS << "DPP8"; break; 1009 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1010 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1011 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1012 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1013 case ImmTyDppFi: OS << "FI"; break; 1014 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1015 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1016 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1017 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1018 case ImmTyDMask: OS << "DMask"; break; 1019 case ImmTyDim: OS << "Dim"; break; 1020 case ImmTyUNorm: OS << "UNorm"; break; 1021 case ImmTyDA: OS << "DA"; break; 1022 case ImmTyR128A16: OS << "R128A16"; break; 1023 case ImmTyA16: OS << "A16"; break; 1024 case ImmTyLWE: OS << "LWE"; break; 1025 case ImmTyOff: OS << "Off"; break; 1026 case ImmTyExpTgt: OS << "ExpTgt"; break; 1027 case ImmTyExpCompr: OS << "ExpCompr"; break; 1028 case ImmTyExpVM: OS << "ExpVM"; break; 1029 case ImmTyHwreg: OS << "Hwreg"; break; 1030 case ImmTySendMsg: OS << "SendMsg"; break; 1031 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1032 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1033 case ImmTyAttrChan: OS << "AttrChan"; break; 1034 case ImmTyOpSel: OS << "OpSel"; break; 1035 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1036 case ImmTyNegLo: OS << "NegLo"; break; 1037 case ImmTyNegHi: OS << "NegHi"; break; 1038 case ImmTySwizzle: OS << "Swizzle"; break; 1039 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1040 case ImmTyHigh: OS << "High"; break; 1041 case ImmTyBLGP: OS << "BLGP"; break; 1042 case ImmTyCBSZ: OS << "CBSZ"; break; 1043 case ImmTyABID: OS << "ABID"; break; 1044 case ImmTyEndpgm: OS << "Endpgm"; break; 1045 } 1046 } 1047 1048 void print(raw_ostream &OS) const override { 1049 switch (Kind) { 1050 case Register: 1051 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1052 break; 1053 case Immediate: 1054 OS << '<' << getImm(); 1055 if (getImmTy() != ImmTyNone) { 1056 OS << " type: "; printImmTy(OS, getImmTy()); 1057 } 1058 OS << " mods: " << Imm.Mods << '>'; 1059 break; 1060 case Token: 1061 OS << '\'' << getToken() << '\''; 1062 break; 1063 case Expression: 1064 OS << "<expr " << *Expr << '>'; 1065 break; 1066 } 1067 } 1068 1069 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1070 int64_t Val, SMLoc Loc, 1071 ImmTy Type = ImmTyNone, 1072 bool IsFPImm = false) { 1073 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1074 Op->Imm.Val = Val; 1075 Op->Imm.IsFPImm = IsFPImm; 1076 Op->Imm.Kind = ImmKindTyNone; 1077 Op->Imm.Type = Type; 1078 Op->Imm.Mods = Modifiers(); 1079 Op->StartLoc = Loc; 1080 Op->EndLoc = Loc; 1081 return Op; 1082 } 1083 1084 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1085 StringRef Str, SMLoc Loc, 1086 bool HasExplicitEncodingSize = true) { 1087 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1088 Res->Tok.Data = Str.data(); 1089 Res->Tok.Length = Str.size(); 1090 Res->StartLoc = Loc; 1091 Res->EndLoc = Loc; 1092 return Res; 1093 } 1094 1095 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1096 unsigned RegNo, SMLoc S, 1097 SMLoc E) { 1098 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1099 Op->Reg.RegNo = RegNo; 1100 Op->Reg.Mods = Modifiers(); 1101 Op->StartLoc = S; 1102 Op->EndLoc = E; 1103 return Op; 1104 } 1105 1106 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1107 const class MCExpr *Expr, SMLoc S) { 1108 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1109 Op->Expr = Expr; 1110 Op->StartLoc = S; 1111 Op->EndLoc = S; 1112 return Op; 1113 } 1114 }; 1115 1116 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1117 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1118 return OS; 1119 } 1120 1121 //===----------------------------------------------------------------------===// 1122 // AsmParser 1123 //===----------------------------------------------------------------------===// 1124 1125 // Holds info related to the current kernel, e.g. count of SGPRs used. 1126 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1127 // .amdgpu_hsa_kernel or at EOF. 1128 class KernelScopeInfo { 1129 int SgprIndexUnusedMin = -1; 1130 int VgprIndexUnusedMin = -1; 1131 int AgprIndexUnusedMin = -1; 1132 MCContext *Ctx = nullptr; 1133 MCSubtargetInfo const *MSTI = nullptr; 1134 1135 void usesSgprAt(int i) { 1136 if (i >= SgprIndexUnusedMin) { 1137 SgprIndexUnusedMin = ++i; 1138 if (Ctx) { 1139 MCSymbol* const Sym = 1140 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1141 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1142 } 1143 } 1144 } 1145 1146 void usesVgprAt(int i) { 1147 if (i >= VgprIndexUnusedMin) { 1148 VgprIndexUnusedMin = ++i; 1149 if (Ctx) { 1150 MCSymbol* const Sym = 1151 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1152 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1153 VgprIndexUnusedMin); 1154 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1155 } 1156 } 1157 } 1158 1159 void usesAgprAt(int i) { 1160 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1161 if (!hasMAIInsts(*MSTI)) 1162 return; 1163 1164 if (i >= AgprIndexUnusedMin) { 1165 AgprIndexUnusedMin = ++i; 1166 if (Ctx) { 1167 MCSymbol* const Sym = 1168 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1169 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1170 1171 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1172 MCSymbol* const vSym = 1173 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1174 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1175 VgprIndexUnusedMin); 1176 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1177 } 1178 } 1179 } 1180 1181 public: 1182 KernelScopeInfo() = default; 1183 1184 void initialize(MCContext &Context) { 1185 Ctx = &Context; 1186 MSTI = Ctx->getSubtargetInfo(); 1187 1188 usesSgprAt(SgprIndexUnusedMin = -1); 1189 usesVgprAt(VgprIndexUnusedMin = -1); 1190 if (hasMAIInsts(*MSTI)) { 1191 usesAgprAt(AgprIndexUnusedMin = -1); 1192 } 1193 } 1194 1195 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1196 unsigned RegWidth) { 1197 switch (RegKind) { 1198 case IS_SGPR: 1199 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1200 break; 1201 case IS_AGPR: 1202 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1203 break; 1204 case IS_VGPR: 1205 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1206 break; 1207 default: 1208 break; 1209 } 1210 } 1211 }; 1212 1213 class AMDGPUAsmParser : public MCTargetAsmParser { 1214 MCAsmParser &Parser; 1215 1216 // Number of extra operands parsed after the first optional operand. 1217 // This may be necessary to skip hardcoded mandatory operands. 1218 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1219 1220 unsigned ForcedEncodingSize = 0; 1221 bool ForcedDPP = false; 1222 bool ForcedSDWA = false; 1223 KernelScopeInfo KernelScope; 1224 unsigned CPolSeen; 1225 1226 /// @name Auto-generated Match Functions 1227 /// { 1228 1229 #define GET_ASSEMBLER_HEADER 1230 #include "AMDGPUGenAsmMatcher.inc" 1231 1232 /// } 1233 1234 private: 1235 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1236 bool OutOfRangeError(SMRange Range); 1237 /// Calculate VGPR/SGPR blocks required for given target, reserved 1238 /// registers, and user-specified NextFreeXGPR values. 1239 /// 1240 /// \param Features [in] Target features, used for bug corrections. 1241 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1242 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1243 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1244 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1245 /// descriptor field, if valid. 1246 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1247 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1248 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1249 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1250 /// \param VGPRBlocks [out] Result VGPR block count. 1251 /// \param SGPRBlocks [out] Result SGPR block count. 1252 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1253 bool FlatScrUsed, bool XNACKUsed, 1254 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1255 SMRange VGPRRange, unsigned NextFreeSGPR, 1256 SMRange SGPRRange, unsigned &VGPRBlocks, 1257 unsigned &SGPRBlocks); 1258 bool ParseDirectiveAMDGCNTarget(); 1259 bool ParseDirectiveAMDHSAKernel(); 1260 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1261 bool ParseDirectiveHSACodeObjectVersion(); 1262 bool ParseDirectiveHSACodeObjectISA(); 1263 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1264 bool ParseDirectiveAMDKernelCodeT(); 1265 // TODO: Possibly make subtargetHasRegister const. 1266 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1267 bool ParseDirectiveAMDGPUHsaKernel(); 1268 1269 bool ParseDirectiveISAVersion(); 1270 bool ParseDirectiveHSAMetadata(); 1271 bool ParseDirectivePALMetadataBegin(); 1272 bool ParseDirectivePALMetadata(); 1273 bool ParseDirectiveAMDGPULDS(); 1274 1275 /// Common code to parse out a block of text (typically YAML) between start and 1276 /// end directives. 1277 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1278 const char *AssemblerDirectiveEnd, 1279 std::string &CollectString); 1280 1281 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1282 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1283 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1284 unsigned &RegNum, unsigned &RegWidth, 1285 bool RestoreOnFailure = false); 1286 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1287 unsigned &RegNum, unsigned &RegWidth, 1288 SmallVectorImpl<AsmToken> &Tokens); 1289 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1290 unsigned &RegWidth, 1291 SmallVectorImpl<AsmToken> &Tokens); 1292 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1293 unsigned &RegWidth, 1294 SmallVectorImpl<AsmToken> &Tokens); 1295 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1296 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1297 bool ParseRegRange(unsigned& Num, unsigned& Width); 1298 unsigned getRegularReg(RegisterKind RegKind, 1299 unsigned RegNum, 1300 unsigned RegWidth, 1301 SMLoc Loc); 1302 1303 bool isRegister(); 1304 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1305 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1306 void initializeGprCountSymbol(RegisterKind RegKind); 1307 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1308 unsigned RegWidth); 1309 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1310 bool IsAtomic, bool IsLds = false); 1311 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1312 bool IsGdsHardcoded); 1313 1314 public: 1315 enum AMDGPUMatchResultTy { 1316 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1317 }; 1318 enum OperandMode { 1319 OperandMode_Default, 1320 OperandMode_NSA, 1321 }; 1322 1323 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1324 1325 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1326 const MCInstrInfo &MII, 1327 const MCTargetOptions &Options) 1328 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1329 MCAsmParserExtension::Initialize(Parser); 1330 1331 if (getFeatureBits().none()) { 1332 // Set default features. 1333 copySTI().ToggleFeature("southern-islands"); 1334 } 1335 1336 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1337 1338 { 1339 // TODO: make those pre-defined variables read-only. 1340 // Currently there is none suitable machinery in the core llvm-mc for this. 1341 // MCSymbol::isRedefinable is intended for another purpose, and 1342 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1343 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1344 MCContext &Ctx = getContext(); 1345 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1346 MCSymbol *Sym = 1347 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1348 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1349 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1350 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1351 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1352 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1353 } else { 1354 MCSymbol *Sym = 1355 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1357 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1359 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1360 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1361 } 1362 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1363 initializeGprCountSymbol(IS_VGPR); 1364 initializeGprCountSymbol(IS_SGPR); 1365 } else 1366 KernelScope.initialize(getContext()); 1367 } 1368 } 1369 1370 bool hasMIMG_R128() const { 1371 return AMDGPU::hasMIMG_R128(getSTI()); 1372 } 1373 1374 bool hasPackedD16() const { 1375 return AMDGPU::hasPackedD16(getSTI()); 1376 } 1377 1378 bool hasGFX10A16() const { 1379 return AMDGPU::hasGFX10A16(getSTI()); 1380 } 1381 1382 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1383 1384 bool isSI() const { 1385 return AMDGPU::isSI(getSTI()); 1386 } 1387 1388 bool isCI() const { 1389 return AMDGPU::isCI(getSTI()); 1390 } 1391 1392 bool isVI() const { 1393 return AMDGPU::isVI(getSTI()); 1394 } 1395 1396 bool isGFX9() const { 1397 return AMDGPU::isGFX9(getSTI()); 1398 } 1399 1400 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1401 bool isGFX90A() const { 1402 return AMDGPU::isGFX90A(getSTI()); 1403 } 1404 1405 bool isGFX940() const { 1406 return AMDGPU::isGFX940(getSTI()); 1407 } 1408 1409 bool isGFX9Plus() const { 1410 return AMDGPU::isGFX9Plus(getSTI()); 1411 } 1412 1413 bool isGFX10() const { 1414 return AMDGPU::isGFX10(getSTI()); 1415 } 1416 1417 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1418 1419 bool isGFX11() const { 1420 return AMDGPU::isGFX11(getSTI()); 1421 } 1422 1423 bool isGFX11Plus() const { 1424 return AMDGPU::isGFX11Plus(getSTI()); 1425 } 1426 1427 bool isGFX10_BEncoding() const { 1428 return AMDGPU::isGFX10_BEncoding(getSTI()); 1429 } 1430 1431 bool hasInv2PiInlineImm() const { 1432 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1433 } 1434 1435 bool hasFlatOffsets() const { 1436 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1437 } 1438 1439 bool hasArchitectedFlatScratch() const { 1440 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1441 } 1442 1443 bool hasSGPR102_SGPR103() const { 1444 return !isVI() && !isGFX9(); 1445 } 1446 1447 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1448 1449 bool hasIntClamp() const { 1450 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1451 } 1452 1453 AMDGPUTargetStreamer &getTargetStreamer() { 1454 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1455 return static_cast<AMDGPUTargetStreamer &>(TS); 1456 } 1457 1458 const MCRegisterInfo *getMRI() const { 1459 // We need this const_cast because for some reason getContext() is not const 1460 // in MCAsmParser. 1461 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1462 } 1463 1464 const MCInstrInfo *getMII() const { 1465 return &MII; 1466 } 1467 1468 const FeatureBitset &getFeatureBits() const { 1469 return getSTI().getFeatureBits(); 1470 } 1471 1472 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1473 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1474 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1475 1476 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1477 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1478 bool isForcedDPP() const { return ForcedDPP; } 1479 bool isForcedSDWA() const { return ForcedSDWA; } 1480 ArrayRef<unsigned> getMatchedVariants() const; 1481 StringRef getMatchedVariantName() const; 1482 1483 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1484 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1485 bool RestoreOnFailure); 1486 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1487 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1488 SMLoc &EndLoc) override; 1489 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1490 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1491 unsigned Kind) override; 1492 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1493 OperandVector &Operands, MCStreamer &Out, 1494 uint64_t &ErrorInfo, 1495 bool MatchingInlineAsm) override; 1496 bool ParseDirective(AsmToken DirectiveID) override; 1497 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1498 OperandMode Mode = OperandMode_Default); 1499 StringRef parseMnemonicSuffix(StringRef Name); 1500 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1501 SMLoc NameLoc, OperandVector &Operands) override; 1502 //bool ProcessInstruction(MCInst &Inst); 1503 1504 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1505 1506 OperandMatchResultTy 1507 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1508 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1509 bool (*ConvertResult)(int64_t &) = nullptr); 1510 1511 OperandMatchResultTy 1512 parseOperandArrayWithPrefix(const char *Prefix, 1513 OperandVector &Operands, 1514 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1515 bool (*ConvertResult)(int64_t&) = nullptr); 1516 1517 OperandMatchResultTy 1518 parseNamedBit(StringRef Name, OperandVector &Operands, 1519 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1520 OperandMatchResultTy parseCPol(OperandVector &Operands); 1521 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1522 StringRef &Value, 1523 SMLoc &StringLoc); 1524 1525 bool isModifier(); 1526 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1527 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1528 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1529 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1530 bool parseSP3NegModifier(); 1531 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1532 OperandMatchResultTy parseReg(OperandVector &Operands); 1533 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1534 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1535 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1536 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1537 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1538 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1539 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1540 OperandMatchResultTy parseUfmt(int64_t &Format); 1541 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1542 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1543 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1544 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1545 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1546 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1547 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1548 1549 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1550 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1551 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1552 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1553 1554 bool parseCnt(int64_t &IntVal); 1555 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1556 1557 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1558 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1559 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1560 1561 bool parseDelay(int64_t &Delay); 1562 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1563 1564 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1565 1566 private: 1567 struct OperandInfoTy { 1568 SMLoc Loc; 1569 int64_t Id; 1570 bool IsSymbolic = false; 1571 bool IsDefined = false; 1572 1573 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1574 }; 1575 1576 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1577 bool validateSendMsg(const OperandInfoTy &Msg, 1578 const OperandInfoTy &Op, 1579 const OperandInfoTy &Stream); 1580 1581 bool parseHwregBody(OperandInfoTy &HwReg, 1582 OperandInfoTy &Offset, 1583 OperandInfoTy &Width); 1584 bool validateHwreg(const OperandInfoTy &HwReg, 1585 const OperandInfoTy &Offset, 1586 const OperandInfoTy &Width); 1587 1588 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1589 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1590 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1591 1592 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1593 const OperandVector &Operands) const; 1594 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1595 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1596 SMLoc getLitLoc(const OperandVector &Operands) const; 1597 SMLoc getConstLoc(const OperandVector &Operands) const; 1598 1599 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1600 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1601 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1602 bool validateSOPLiteral(const MCInst &Inst) const; 1603 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1605 bool validateIntClampSupported(const MCInst &Inst); 1606 bool validateMIMGAtomicDMask(const MCInst &Inst); 1607 bool validateMIMGGatherDMask(const MCInst &Inst); 1608 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1609 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1610 bool validateMIMGAddrSize(const MCInst &Inst); 1611 bool validateMIMGD16(const MCInst &Inst); 1612 bool validateMIMGDim(const MCInst &Inst); 1613 bool validateMIMGMSAA(const MCInst &Inst); 1614 bool validateOpSel(const MCInst &Inst); 1615 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1616 bool validateVccOperand(unsigned Reg) const; 1617 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1618 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1619 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1620 bool validateAGPRLdSt(const MCInst &Inst) const; 1621 bool validateVGPRAlign(const MCInst &Inst) const; 1622 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1623 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1624 bool validateDivScale(const MCInst &Inst); 1625 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1626 const SMLoc &IDLoc); 1627 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1628 unsigned getConstantBusLimit(unsigned Opcode) const; 1629 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1630 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1631 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1632 1633 bool isSupportedMnemo(StringRef Mnemo, 1634 const FeatureBitset &FBS); 1635 bool isSupportedMnemo(StringRef Mnemo, 1636 const FeatureBitset &FBS, 1637 ArrayRef<unsigned> Variants); 1638 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1639 1640 bool isId(const StringRef Id) const; 1641 bool isId(const AsmToken &Token, const StringRef Id) const; 1642 bool isToken(const AsmToken::TokenKind Kind) const; 1643 bool trySkipId(const StringRef Id); 1644 bool trySkipId(const StringRef Pref, const StringRef Id); 1645 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1646 bool trySkipToken(const AsmToken::TokenKind Kind); 1647 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1648 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1649 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1650 1651 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1652 AsmToken::TokenKind getTokenKind() const; 1653 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1654 bool parseExpr(OperandVector &Operands); 1655 StringRef getTokenStr() const; 1656 AsmToken peekToken(); 1657 AsmToken getToken() const; 1658 SMLoc getLoc() const; 1659 void lex(); 1660 1661 public: 1662 void onBeginOfFile() override; 1663 1664 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1665 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1666 1667 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1668 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1669 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1670 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1671 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1672 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1673 1674 bool parseSwizzleOperand(int64_t &Op, 1675 const unsigned MinVal, 1676 const unsigned MaxVal, 1677 const StringRef ErrMsg, 1678 SMLoc &Loc); 1679 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1680 const unsigned MinVal, 1681 const unsigned MaxVal, 1682 const StringRef ErrMsg); 1683 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1684 bool parseSwizzleOffset(int64_t &Imm); 1685 bool parseSwizzleMacro(int64_t &Imm); 1686 bool parseSwizzleQuadPerm(int64_t &Imm); 1687 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1688 bool parseSwizzleBroadcast(int64_t &Imm); 1689 bool parseSwizzleSwap(int64_t &Imm); 1690 bool parseSwizzleReverse(int64_t &Imm); 1691 1692 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1693 int64_t parseGPRIdxMacro(); 1694 1695 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1696 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1697 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1698 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1699 1700 AMDGPUOperand::Ptr defaultCPol() const; 1701 1702 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1703 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1704 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1705 AMDGPUOperand::Ptr defaultFlatOffset() const; 1706 1707 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1708 1709 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1710 OptionalImmIndexMap &OptionalIdx); 1711 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1712 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1713 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1714 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1715 OptionalImmIndexMap &OptionalIdx); 1716 1717 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1718 1719 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1720 bool IsAtomic = false); 1721 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1722 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1723 1724 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1725 1726 bool parseDimId(unsigned &Encoding); 1727 OperandMatchResultTy parseDim(OperandVector &Operands); 1728 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1729 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1730 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1731 int64_t parseDPPCtrlSel(StringRef Ctrl); 1732 int64_t parseDPPCtrlPerm(); 1733 AMDGPUOperand::Ptr defaultRowMask() const; 1734 AMDGPUOperand::Ptr defaultBankMask() const; 1735 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1736 AMDGPUOperand::Ptr defaultFI() const; 1737 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1738 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1739 1740 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1741 AMDGPUOperand::ImmTy Type); 1742 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1743 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1744 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1745 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1746 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1747 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1748 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1749 uint64_t BasicInstType, 1750 bool SkipDstVcc = false, 1751 bool SkipSrcVcc = false); 1752 1753 AMDGPUOperand::Ptr defaultBLGP() const; 1754 AMDGPUOperand::Ptr defaultCBSZ() const; 1755 AMDGPUOperand::Ptr defaultABID() const; 1756 1757 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1758 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1759 }; 1760 1761 struct OptionalOperand { 1762 const char *Name; 1763 AMDGPUOperand::ImmTy Type; 1764 bool IsBit; 1765 bool (*ConvertResult)(int64_t&); 1766 }; 1767 1768 } // end anonymous namespace 1769 1770 // May be called with integer type with equivalent bitwidth. 1771 static const fltSemantics *getFltSemantics(unsigned Size) { 1772 switch (Size) { 1773 case 4: 1774 return &APFloat::IEEEsingle(); 1775 case 8: 1776 return &APFloat::IEEEdouble(); 1777 case 2: 1778 return &APFloat::IEEEhalf(); 1779 default: 1780 llvm_unreachable("unsupported fp type"); 1781 } 1782 } 1783 1784 static const fltSemantics *getFltSemantics(MVT VT) { 1785 return getFltSemantics(VT.getSizeInBits() / 8); 1786 } 1787 1788 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1789 switch (OperandType) { 1790 case AMDGPU::OPERAND_REG_IMM_INT32: 1791 case AMDGPU::OPERAND_REG_IMM_FP32: 1792 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1793 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1794 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1795 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1797 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1798 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1799 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1800 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1801 case AMDGPU::OPERAND_KIMM32: 1802 return &APFloat::IEEEsingle(); 1803 case AMDGPU::OPERAND_REG_IMM_INT64: 1804 case AMDGPU::OPERAND_REG_IMM_FP64: 1805 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1806 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1808 return &APFloat::IEEEdouble(); 1809 case AMDGPU::OPERAND_REG_IMM_INT16: 1810 case AMDGPU::OPERAND_REG_IMM_FP16: 1811 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1812 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1813 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1814 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1815 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1816 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1817 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1818 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1820 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1821 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1822 case AMDGPU::OPERAND_KIMM16: 1823 return &APFloat::IEEEhalf(); 1824 default: 1825 llvm_unreachable("unsupported fp type"); 1826 } 1827 } 1828 1829 //===----------------------------------------------------------------------===// 1830 // Operand 1831 //===----------------------------------------------------------------------===// 1832 1833 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1834 bool Lost; 1835 1836 // Convert literal to single precision 1837 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1838 APFloat::rmNearestTiesToEven, 1839 &Lost); 1840 // We allow precision lost but not overflow or underflow 1841 if (Status != APFloat::opOK && 1842 Lost && 1843 ((Status & APFloat::opOverflow) != 0 || 1844 (Status & APFloat::opUnderflow) != 0)) { 1845 return false; 1846 } 1847 1848 return true; 1849 } 1850 1851 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1852 return isUIntN(Size, Val) || isIntN(Size, Val); 1853 } 1854 1855 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1856 if (VT.getScalarType() == MVT::i16) { 1857 // FP immediate values are broken. 1858 return isInlinableIntLiteral(Val); 1859 } 1860 1861 // f16/v2f16 operands work correctly for all values. 1862 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1863 } 1864 1865 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1866 1867 // This is a hack to enable named inline values like 1868 // shared_base with both 32-bit and 64-bit operands. 1869 // Note that these values are defined as 1870 // 32-bit operands only. 1871 if (isInlineValue()) { 1872 return true; 1873 } 1874 1875 if (!isImmTy(ImmTyNone)) { 1876 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1877 return false; 1878 } 1879 // TODO: We should avoid using host float here. It would be better to 1880 // check the float bit values which is what a few other places do. 1881 // We've had bot failures before due to weird NaN support on mips hosts. 1882 1883 APInt Literal(64, Imm.Val); 1884 1885 if (Imm.IsFPImm) { // We got fp literal token 1886 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1887 return AMDGPU::isInlinableLiteral64(Imm.Val, 1888 AsmParser->hasInv2PiInlineImm()); 1889 } 1890 1891 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1892 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1893 return false; 1894 1895 if (type.getScalarSizeInBits() == 16) { 1896 return isInlineableLiteralOp16( 1897 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1898 type, AsmParser->hasInv2PiInlineImm()); 1899 } 1900 1901 // Check if single precision literal is inlinable 1902 return AMDGPU::isInlinableLiteral32( 1903 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1904 AsmParser->hasInv2PiInlineImm()); 1905 } 1906 1907 // We got int literal token. 1908 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1909 return AMDGPU::isInlinableLiteral64(Imm.Val, 1910 AsmParser->hasInv2PiInlineImm()); 1911 } 1912 1913 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1914 return false; 1915 } 1916 1917 if (type.getScalarSizeInBits() == 16) { 1918 return isInlineableLiteralOp16( 1919 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1920 type, AsmParser->hasInv2PiInlineImm()); 1921 } 1922 1923 return AMDGPU::isInlinableLiteral32( 1924 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1925 AsmParser->hasInv2PiInlineImm()); 1926 } 1927 1928 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1929 // Check that this immediate can be added as literal 1930 if (!isImmTy(ImmTyNone)) { 1931 return false; 1932 } 1933 1934 if (!Imm.IsFPImm) { 1935 // We got int literal token. 1936 1937 if (type == MVT::f64 && hasFPModifiers()) { 1938 // Cannot apply fp modifiers to int literals preserving the same semantics 1939 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1940 // disable these cases. 1941 return false; 1942 } 1943 1944 unsigned Size = type.getSizeInBits(); 1945 if (Size == 64) 1946 Size = 32; 1947 1948 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1949 // types. 1950 return isSafeTruncation(Imm.Val, Size); 1951 } 1952 1953 // We got fp literal token 1954 if (type == MVT::f64) { // Expected 64-bit fp operand 1955 // We would set low 64-bits of literal to zeroes but we accept this literals 1956 return true; 1957 } 1958 1959 if (type == MVT::i64) { // Expected 64-bit int operand 1960 // We don't allow fp literals in 64-bit integer instructions. It is 1961 // unclear how we should encode them. 1962 return false; 1963 } 1964 1965 // We allow fp literals with f16x2 operands assuming that the specified 1966 // literal goes into the lower half and the upper half is zero. We also 1967 // require that the literal may be losslessly converted to f16. 1968 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1969 (type == MVT::v2i16)? MVT::i16 : 1970 (type == MVT::v2f32)? MVT::f32 : type; 1971 1972 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1973 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1974 } 1975 1976 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1977 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1978 } 1979 1980 bool AMDGPUOperand::isVRegWithInputMods() const { 1981 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1982 // GFX90A allows DPP on 64-bit operands. 1983 (isRegClass(AMDGPU::VReg_64RegClassID) && 1984 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1985 } 1986 1987 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1988 if (AsmParser->isVI()) 1989 return isVReg32(); 1990 else if (AsmParser->isGFX9Plus()) 1991 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1992 else 1993 return false; 1994 } 1995 1996 bool AMDGPUOperand::isSDWAFP16Operand() const { 1997 return isSDWAOperand(MVT::f16); 1998 } 1999 2000 bool AMDGPUOperand::isSDWAFP32Operand() const { 2001 return isSDWAOperand(MVT::f32); 2002 } 2003 2004 bool AMDGPUOperand::isSDWAInt16Operand() const { 2005 return isSDWAOperand(MVT::i16); 2006 } 2007 2008 bool AMDGPUOperand::isSDWAInt32Operand() const { 2009 return isSDWAOperand(MVT::i32); 2010 } 2011 2012 bool AMDGPUOperand::isBoolReg() const { 2013 auto FB = AsmParser->getFeatureBits(); 2014 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2015 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2016 } 2017 2018 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2019 { 2020 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2021 assert(Size == 2 || Size == 4 || Size == 8); 2022 2023 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2024 2025 if (Imm.Mods.Abs) { 2026 Val &= ~FpSignMask; 2027 } 2028 if (Imm.Mods.Neg) { 2029 Val ^= FpSignMask; 2030 } 2031 2032 return Val; 2033 } 2034 2035 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2036 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2037 Inst.getNumOperands())) { 2038 addLiteralImmOperand(Inst, Imm.Val, 2039 ApplyModifiers & 2040 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2041 } else { 2042 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2043 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2044 setImmKindNone(); 2045 } 2046 } 2047 2048 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2049 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2050 auto OpNum = Inst.getNumOperands(); 2051 // Check that this operand accepts literals 2052 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2053 2054 if (ApplyModifiers) { 2055 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2056 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2057 Val = applyInputFPModifiers(Val, Size); 2058 } 2059 2060 APInt Literal(64, Val); 2061 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2062 2063 if (Imm.IsFPImm) { // We got fp literal token 2064 switch (OpTy) { 2065 case AMDGPU::OPERAND_REG_IMM_INT64: 2066 case AMDGPU::OPERAND_REG_IMM_FP64: 2067 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2068 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2069 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2070 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2071 AsmParser->hasInv2PiInlineImm())) { 2072 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2073 setImmKindConst(); 2074 return; 2075 } 2076 2077 // Non-inlineable 2078 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2079 // For fp operands we check if low 32 bits are zeros 2080 if (Literal.getLoBits(32) != 0) { 2081 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2082 "Can't encode literal as exact 64-bit floating-point operand. " 2083 "Low 32-bits will be set to zero"); 2084 } 2085 2086 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2087 setImmKindLiteral(); 2088 return; 2089 } 2090 2091 // We don't allow fp literals in 64-bit integer instructions. It is 2092 // unclear how we should encode them. This case should be checked earlier 2093 // in predicate methods (isLiteralImm()) 2094 llvm_unreachable("fp literal in 64-bit integer instruction."); 2095 2096 case AMDGPU::OPERAND_REG_IMM_INT32: 2097 case AMDGPU::OPERAND_REG_IMM_FP32: 2098 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2099 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2100 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2102 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2103 case AMDGPU::OPERAND_REG_IMM_INT16: 2104 case AMDGPU::OPERAND_REG_IMM_FP16: 2105 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2106 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2107 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2108 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2109 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2110 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2111 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2112 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2113 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2114 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2115 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2116 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2117 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2118 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2119 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2120 case AMDGPU::OPERAND_KIMM32: 2121 case AMDGPU::OPERAND_KIMM16: { 2122 bool lost; 2123 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2124 // Convert literal to single precision 2125 FPLiteral.convert(*getOpFltSemantics(OpTy), 2126 APFloat::rmNearestTiesToEven, &lost); 2127 // We allow precision lost but not overflow or underflow. This should be 2128 // checked earlier in isLiteralImm() 2129 2130 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2131 Inst.addOperand(MCOperand::createImm(ImmVal)); 2132 setImmKindLiteral(); 2133 return; 2134 } 2135 default: 2136 llvm_unreachable("invalid operand size"); 2137 } 2138 2139 return; 2140 } 2141 2142 // We got int literal token. 2143 // Only sign extend inline immediates. 2144 switch (OpTy) { 2145 case AMDGPU::OPERAND_REG_IMM_INT32: 2146 case AMDGPU::OPERAND_REG_IMM_FP32: 2147 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2148 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2149 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2150 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2151 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2152 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2153 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2154 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2155 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2156 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2157 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2158 if (isSafeTruncation(Val, 32) && 2159 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2160 AsmParser->hasInv2PiInlineImm())) { 2161 Inst.addOperand(MCOperand::createImm(Val)); 2162 setImmKindConst(); 2163 return; 2164 } 2165 2166 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2167 setImmKindLiteral(); 2168 return; 2169 2170 case AMDGPU::OPERAND_REG_IMM_INT64: 2171 case AMDGPU::OPERAND_REG_IMM_FP64: 2172 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2173 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2174 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2175 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2176 Inst.addOperand(MCOperand::createImm(Val)); 2177 setImmKindConst(); 2178 return; 2179 } 2180 2181 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2182 setImmKindLiteral(); 2183 return; 2184 2185 case AMDGPU::OPERAND_REG_IMM_INT16: 2186 case AMDGPU::OPERAND_REG_IMM_FP16: 2187 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2188 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2189 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2190 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2191 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2192 if (isSafeTruncation(Val, 16) && 2193 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2194 AsmParser->hasInv2PiInlineImm())) { 2195 Inst.addOperand(MCOperand::createImm(Val)); 2196 setImmKindConst(); 2197 return; 2198 } 2199 2200 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2201 setImmKindLiteral(); 2202 return; 2203 2204 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2205 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2206 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2207 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2208 assert(isSafeTruncation(Val, 16)); 2209 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2210 AsmParser->hasInv2PiInlineImm())); 2211 2212 Inst.addOperand(MCOperand::createImm(Val)); 2213 return; 2214 } 2215 case AMDGPU::OPERAND_KIMM32: 2216 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2217 setImmKindNone(); 2218 return; 2219 case AMDGPU::OPERAND_KIMM16: 2220 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2221 setImmKindNone(); 2222 return; 2223 default: 2224 llvm_unreachable("invalid operand size"); 2225 } 2226 } 2227 2228 template <unsigned Bitwidth> 2229 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2230 APInt Literal(64, Imm.Val); 2231 setImmKindNone(); 2232 2233 if (!Imm.IsFPImm) { 2234 // We got int literal token. 2235 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2236 return; 2237 } 2238 2239 bool Lost; 2240 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2241 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2242 APFloat::rmNearestTiesToEven, &Lost); 2243 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2244 } 2245 2246 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2247 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2248 } 2249 2250 static bool isInlineValue(unsigned Reg) { 2251 switch (Reg) { 2252 case AMDGPU::SRC_SHARED_BASE: 2253 case AMDGPU::SRC_SHARED_LIMIT: 2254 case AMDGPU::SRC_PRIVATE_BASE: 2255 case AMDGPU::SRC_PRIVATE_LIMIT: 2256 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2257 return true; 2258 case AMDGPU::SRC_VCCZ: 2259 case AMDGPU::SRC_EXECZ: 2260 case AMDGPU::SRC_SCC: 2261 return true; 2262 case AMDGPU::SGPR_NULL: 2263 return true; 2264 default: 2265 return false; 2266 } 2267 } 2268 2269 bool AMDGPUOperand::isInlineValue() const { 2270 return isRegKind() && ::isInlineValue(getReg()); 2271 } 2272 2273 //===----------------------------------------------------------------------===// 2274 // AsmParser 2275 //===----------------------------------------------------------------------===// 2276 2277 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2278 if (Is == IS_VGPR) { 2279 switch (RegWidth) { 2280 default: return -1; 2281 case 32: 2282 return AMDGPU::VGPR_32RegClassID; 2283 case 64: 2284 return AMDGPU::VReg_64RegClassID; 2285 case 96: 2286 return AMDGPU::VReg_96RegClassID; 2287 case 128: 2288 return AMDGPU::VReg_128RegClassID; 2289 case 160: 2290 return AMDGPU::VReg_160RegClassID; 2291 case 192: 2292 return AMDGPU::VReg_192RegClassID; 2293 case 224: 2294 return AMDGPU::VReg_224RegClassID; 2295 case 256: 2296 return AMDGPU::VReg_256RegClassID; 2297 case 512: 2298 return AMDGPU::VReg_512RegClassID; 2299 case 1024: 2300 return AMDGPU::VReg_1024RegClassID; 2301 } 2302 } else if (Is == IS_TTMP) { 2303 switch (RegWidth) { 2304 default: return -1; 2305 case 32: 2306 return AMDGPU::TTMP_32RegClassID; 2307 case 64: 2308 return AMDGPU::TTMP_64RegClassID; 2309 case 128: 2310 return AMDGPU::TTMP_128RegClassID; 2311 case 256: 2312 return AMDGPU::TTMP_256RegClassID; 2313 case 512: 2314 return AMDGPU::TTMP_512RegClassID; 2315 } 2316 } else if (Is == IS_SGPR) { 2317 switch (RegWidth) { 2318 default: return -1; 2319 case 32: 2320 return AMDGPU::SGPR_32RegClassID; 2321 case 64: 2322 return AMDGPU::SGPR_64RegClassID; 2323 case 96: 2324 return AMDGPU::SGPR_96RegClassID; 2325 case 128: 2326 return AMDGPU::SGPR_128RegClassID; 2327 case 160: 2328 return AMDGPU::SGPR_160RegClassID; 2329 case 192: 2330 return AMDGPU::SGPR_192RegClassID; 2331 case 224: 2332 return AMDGPU::SGPR_224RegClassID; 2333 case 256: 2334 return AMDGPU::SGPR_256RegClassID; 2335 case 512: 2336 return AMDGPU::SGPR_512RegClassID; 2337 } 2338 } else if (Is == IS_AGPR) { 2339 switch (RegWidth) { 2340 default: return -1; 2341 case 32: 2342 return AMDGPU::AGPR_32RegClassID; 2343 case 64: 2344 return AMDGPU::AReg_64RegClassID; 2345 case 96: 2346 return AMDGPU::AReg_96RegClassID; 2347 case 128: 2348 return AMDGPU::AReg_128RegClassID; 2349 case 160: 2350 return AMDGPU::AReg_160RegClassID; 2351 case 192: 2352 return AMDGPU::AReg_192RegClassID; 2353 case 224: 2354 return AMDGPU::AReg_224RegClassID; 2355 case 256: 2356 return AMDGPU::AReg_256RegClassID; 2357 case 512: 2358 return AMDGPU::AReg_512RegClassID; 2359 case 1024: 2360 return AMDGPU::AReg_1024RegClassID; 2361 } 2362 } 2363 return -1; 2364 } 2365 2366 static unsigned getSpecialRegForName(StringRef RegName) { 2367 return StringSwitch<unsigned>(RegName) 2368 .Case("exec", AMDGPU::EXEC) 2369 .Case("vcc", AMDGPU::VCC) 2370 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2371 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2372 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2373 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2374 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2375 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2376 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2377 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2378 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2379 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2380 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2381 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2382 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2383 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2384 .Case("m0", AMDGPU::M0) 2385 .Case("vccz", AMDGPU::SRC_VCCZ) 2386 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2387 .Case("execz", AMDGPU::SRC_EXECZ) 2388 .Case("src_execz", AMDGPU::SRC_EXECZ) 2389 .Case("scc", AMDGPU::SRC_SCC) 2390 .Case("src_scc", AMDGPU::SRC_SCC) 2391 .Case("tba", AMDGPU::TBA) 2392 .Case("tma", AMDGPU::TMA) 2393 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2394 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2395 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2396 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2397 .Case("vcc_lo", AMDGPU::VCC_LO) 2398 .Case("vcc_hi", AMDGPU::VCC_HI) 2399 .Case("exec_lo", AMDGPU::EXEC_LO) 2400 .Case("exec_hi", AMDGPU::EXEC_HI) 2401 .Case("tma_lo", AMDGPU::TMA_LO) 2402 .Case("tma_hi", AMDGPU::TMA_HI) 2403 .Case("tba_lo", AMDGPU::TBA_LO) 2404 .Case("tba_hi", AMDGPU::TBA_HI) 2405 .Case("pc", AMDGPU::PC_REG) 2406 .Case("null", AMDGPU::SGPR_NULL) 2407 .Default(AMDGPU::NoRegister); 2408 } 2409 2410 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2411 SMLoc &EndLoc, bool RestoreOnFailure) { 2412 auto R = parseRegister(); 2413 if (!R) return true; 2414 assert(R->isReg()); 2415 RegNo = R->getReg(); 2416 StartLoc = R->getStartLoc(); 2417 EndLoc = R->getEndLoc(); 2418 return false; 2419 } 2420 2421 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2422 SMLoc &EndLoc) { 2423 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2424 } 2425 2426 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2427 SMLoc &StartLoc, 2428 SMLoc &EndLoc) { 2429 bool Result = 2430 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2431 bool PendingErrors = getParser().hasPendingError(); 2432 getParser().clearPendingErrors(); 2433 if (PendingErrors) 2434 return MatchOperand_ParseFail; 2435 if (Result) 2436 return MatchOperand_NoMatch; 2437 return MatchOperand_Success; 2438 } 2439 2440 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2441 RegisterKind RegKind, unsigned Reg1, 2442 SMLoc Loc) { 2443 switch (RegKind) { 2444 case IS_SPECIAL: 2445 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2446 Reg = AMDGPU::EXEC; 2447 RegWidth = 64; 2448 return true; 2449 } 2450 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2451 Reg = AMDGPU::FLAT_SCR; 2452 RegWidth = 64; 2453 return true; 2454 } 2455 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2456 Reg = AMDGPU::XNACK_MASK; 2457 RegWidth = 64; 2458 return true; 2459 } 2460 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2461 Reg = AMDGPU::VCC; 2462 RegWidth = 64; 2463 return true; 2464 } 2465 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2466 Reg = AMDGPU::TBA; 2467 RegWidth = 64; 2468 return true; 2469 } 2470 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2471 Reg = AMDGPU::TMA; 2472 RegWidth = 64; 2473 return true; 2474 } 2475 Error(Loc, "register does not fit in the list"); 2476 return false; 2477 case IS_VGPR: 2478 case IS_SGPR: 2479 case IS_AGPR: 2480 case IS_TTMP: 2481 if (Reg1 != Reg + RegWidth / 32) { 2482 Error(Loc, "registers in a list must have consecutive indices"); 2483 return false; 2484 } 2485 RegWidth += 32; 2486 return true; 2487 default: 2488 llvm_unreachable("unexpected register kind"); 2489 } 2490 } 2491 2492 struct RegInfo { 2493 StringLiteral Name; 2494 RegisterKind Kind; 2495 }; 2496 2497 static constexpr RegInfo RegularRegisters[] = { 2498 {{"v"}, IS_VGPR}, 2499 {{"s"}, IS_SGPR}, 2500 {{"ttmp"}, IS_TTMP}, 2501 {{"acc"}, IS_AGPR}, 2502 {{"a"}, IS_AGPR}, 2503 }; 2504 2505 static bool isRegularReg(RegisterKind Kind) { 2506 return Kind == IS_VGPR || 2507 Kind == IS_SGPR || 2508 Kind == IS_TTMP || 2509 Kind == IS_AGPR; 2510 } 2511 2512 static const RegInfo* getRegularRegInfo(StringRef Str) { 2513 for (const RegInfo &Reg : RegularRegisters) 2514 if (Str.startswith(Reg.Name)) 2515 return &Reg; 2516 return nullptr; 2517 } 2518 2519 static bool getRegNum(StringRef Str, unsigned& Num) { 2520 return !Str.getAsInteger(10, Num); 2521 } 2522 2523 bool 2524 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2525 const AsmToken &NextToken) const { 2526 2527 // A list of consecutive registers: [s0,s1,s2,s3] 2528 if (Token.is(AsmToken::LBrac)) 2529 return true; 2530 2531 if (!Token.is(AsmToken::Identifier)) 2532 return false; 2533 2534 // A single register like s0 or a range of registers like s[0:1] 2535 2536 StringRef Str = Token.getString(); 2537 const RegInfo *Reg = getRegularRegInfo(Str); 2538 if (Reg) { 2539 StringRef RegName = Reg->Name; 2540 StringRef RegSuffix = Str.substr(RegName.size()); 2541 if (!RegSuffix.empty()) { 2542 unsigned Num; 2543 // A single register with an index: rXX 2544 if (getRegNum(RegSuffix, Num)) 2545 return true; 2546 } else { 2547 // A range of registers: r[XX:YY]. 2548 if (NextToken.is(AsmToken::LBrac)) 2549 return true; 2550 } 2551 } 2552 2553 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2554 } 2555 2556 bool 2557 AMDGPUAsmParser::isRegister() 2558 { 2559 return isRegister(getToken(), peekToken()); 2560 } 2561 2562 unsigned 2563 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2564 unsigned RegNum, 2565 unsigned RegWidth, 2566 SMLoc Loc) { 2567 2568 assert(isRegularReg(RegKind)); 2569 2570 unsigned AlignSize = 1; 2571 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2572 // SGPR and TTMP registers must be aligned. 2573 // Max required alignment is 4 dwords. 2574 AlignSize = std::min(RegWidth / 32, 4u); 2575 } 2576 2577 if (RegNum % AlignSize != 0) { 2578 Error(Loc, "invalid register alignment"); 2579 return AMDGPU::NoRegister; 2580 } 2581 2582 unsigned RegIdx = RegNum / AlignSize; 2583 int RCID = getRegClass(RegKind, RegWidth); 2584 if (RCID == -1) { 2585 Error(Loc, "invalid or unsupported register size"); 2586 return AMDGPU::NoRegister; 2587 } 2588 2589 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2590 const MCRegisterClass RC = TRI->getRegClass(RCID); 2591 if (RegIdx >= RC.getNumRegs()) { 2592 Error(Loc, "register index is out of range"); 2593 return AMDGPU::NoRegister; 2594 } 2595 2596 return RC.getRegister(RegIdx); 2597 } 2598 2599 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2600 int64_t RegLo, RegHi; 2601 if (!skipToken(AsmToken::LBrac, "missing register index")) 2602 return false; 2603 2604 SMLoc FirstIdxLoc = getLoc(); 2605 SMLoc SecondIdxLoc; 2606 2607 if (!parseExpr(RegLo)) 2608 return false; 2609 2610 if (trySkipToken(AsmToken::Colon)) { 2611 SecondIdxLoc = getLoc(); 2612 if (!parseExpr(RegHi)) 2613 return false; 2614 } else { 2615 RegHi = RegLo; 2616 } 2617 2618 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2619 return false; 2620 2621 if (!isUInt<32>(RegLo)) { 2622 Error(FirstIdxLoc, "invalid register index"); 2623 return false; 2624 } 2625 2626 if (!isUInt<32>(RegHi)) { 2627 Error(SecondIdxLoc, "invalid register index"); 2628 return false; 2629 } 2630 2631 if (RegLo > RegHi) { 2632 Error(FirstIdxLoc, "first register index should not exceed second index"); 2633 return false; 2634 } 2635 2636 Num = static_cast<unsigned>(RegLo); 2637 RegWidth = 32 * ((RegHi - RegLo) + 1); 2638 return true; 2639 } 2640 2641 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2642 unsigned &RegNum, unsigned &RegWidth, 2643 SmallVectorImpl<AsmToken> &Tokens) { 2644 assert(isToken(AsmToken::Identifier)); 2645 unsigned Reg = getSpecialRegForName(getTokenStr()); 2646 if (Reg) { 2647 RegNum = 0; 2648 RegWidth = 32; 2649 RegKind = IS_SPECIAL; 2650 Tokens.push_back(getToken()); 2651 lex(); // skip register name 2652 } 2653 return Reg; 2654 } 2655 2656 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2657 unsigned &RegNum, unsigned &RegWidth, 2658 SmallVectorImpl<AsmToken> &Tokens) { 2659 assert(isToken(AsmToken::Identifier)); 2660 StringRef RegName = getTokenStr(); 2661 auto Loc = getLoc(); 2662 2663 const RegInfo *RI = getRegularRegInfo(RegName); 2664 if (!RI) { 2665 Error(Loc, "invalid register name"); 2666 return AMDGPU::NoRegister; 2667 } 2668 2669 Tokens.push_back(getToken()); 2670 lex(); // skip register name 2671 2672 RegKind = RI->Kind; 2673 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2674 if (!RegSuffix.empty()) { 2675 // Single 32-bit register: vXX. 2676 if (!getRegNum(RegSuffix, RegNum)) { 2677 Error(Loc, "invalid register index"); 2678 return AMDGPU::NoRegister; 2679 } 2680 RegWidth = 32; 2681 } else { 2682 // Range of registers: v[XX:YY]. ":YY" is optional. 2683 if (!ParseRegRange(RegNum, RegWidth)) 2684 return AMDGPU::NoRegister; 2685 } 2686 2687 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2688 } 2689 2690 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2691 unsigned &RegWidth, 2692 SmallVectorImpl<AsmToken> &Tokens) { 2693 unsigned Reg = AMDGPU::NoRegister; 2694 auto ListLoc = getLoc(); 2695 2696 if (!skipToken(AsmToken::LBrac, 2697 "expected a register or a list of registers")) { 2698 return AMDGPU::NoRegister; 2699 } 2700 2701 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2702 2703 auto Loc = getLoc(); 2704 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2705 return AMDGPU::NoRegister; 2706 if (RegWidth != 32) { 2707 Error(Loc, "expected a single 32-bit register"); 2708 return AMDGPU::NoRegister; 2709 } 2710 2711 for (; trySkipToken(AsmToken::Comma); ) { 2712 RegisterKind NextRegKind; 2713 unsigned NextReg, NextRegNum, NextRegWidth; 2714 Loc = getLoc(); 2715 2716 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2717 NextRegNum, NextRegWidth, 2718 Tokens)) { 2719 return AMDGPU::NoRegister; 2720 } 2721 if (NextRegWidth != 32) { 2722 Error(Loc, "expected a single 32-bit register"); 2723 return AMDGPU::NoRegister; 2724 } 2725 if (NextRegKind != RegKind) { 2726 Error(Loc, "registers in a list must be of the same kind"); 2727 return AMDGPU::NoRegister; 2728 } 2729 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2730 return AMDGPU::NoRegister; 2731 } 2732 2733 if (!skipToken(AsmToken::RBrac, 2734 "expected a comma or a closing square bracket")) { 2735 return AMDGPU::NoRegister; 2736 } 2737 2738 if (isRegularReg(RegKind)) 2739 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2740 2741 return Reg; 2742 } 2743 2744 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2745 unsigned &RegNum, unsigned &RegWidth, 2746 SmallVectorImpl<AsmToken> &Tokens) { 2747 auto Loc = getLoc(); 2748 Reg = AMDGPU::NoRegister; 2749 2750 if (isToken(AsmToken::Identifier)) { 2751 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2752 if (Reg == AMDGPU::NoRegister) 2753 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2754 } else { 2755 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2756 } 2757 2758 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2759 if (Reg == AMDGPU::NoRegister) { 2760 assert(Parser.hasPendingError()); 2761 return false; 2762 } 2763 2764 if (!subtargetHasRegister(*TRI, Reg)) { 2765 if (Reg == AMDGPU::SGPR_NULL) { 2766 Error(Loc, "'null' operand is not supported on this GPU"); 2767 } else { 2768 Error(Loc, "register not available on this GPU"); 2769 } 2770 return false; 2771 } 2772 2773 return true; 2774 } 2775 2776 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2777 unsigned &RegNum, unsigned &RegWidth, 2778 bool RestoreOnFailure /*=false*/) { 2779 Reg = AMDGPU::NoRegister; 2780 2781 SmallVector<AsmToken, 1> Tokens; 2782 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2783 if (RestoreOnFailure) { 2784 while (!Tokens.empty()) { 2785 getLexer().UnLex(Tokens.pop_back_val()); 2786 } 2787 } 2788 return true; 2789 } 2790 return false; 2791 } 2792 2793 Optional<StringRef> 2794 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2795 switch (RegKind) { 2796 case IS_VGPR: 2797 return StringRef(".amdgcn.next_free_vgpr"); 2798 case IS_SGPR: 2799 return StringRef(".amdgcn.next_free_sgpr"); 2800 default: 2801 return None; 2802 } 2803 } 2804 2805 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2806 auto SymbolName = getGprCountSymbolName(RegKind); 2807 assert(SymbolName && "initializing invalid register kind"); 2808 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2809 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2810 } 2811 2812 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2813 unsigned DwordRegIndex, 2814 unsigned RegWidth) { 2815 // Symbols are only defined for GCN targets 2816 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2817 return true; 2818 2819 auto SymbolName = getGprCountSymbolName(RegKind); 2820 if (!SymbolName) 2821 return true; 2822 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2823 2824 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2825 int64_t OldCount; 2826 2827 if (!Sym->isVariable()) 2828 return !Error(getLoc(), 2829 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2830 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2831 return !Error( 2832 getLoc(), 2833 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2834 2835 if (OldCount <= NewMax) 2836 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2837 2838 return true; 2839 } 2840 2841 std::unique_ptr<AMDGPUOperand> 2842 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2843 const auto &Tok = getToken(); 2844 SMLoc StartLoc = Tok.getLoc(); 2845 SMLoc EndLoc = Tok.getEndLoc(); 2846 RegisterKind RegKind; 2847 unsigned Reg, RegNum, RegWidth; 2848 2849 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2850 return nullptr; 2851 } 2852 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2853 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2854 return nullptr; 2855 } else 2856 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2857 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2858 } 2859 2860 OperandMatchResultTy 2861 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2862 // TODO: add syntactic sugar for 1/(2*PI) 2863 2864 assert(!isRegister()); 2865 assert(!isModifier()); 2866 2867 const auto& Tok = getToken(); 2868 const auto& NextTok = peekToken(); 2869 bool IsReal = Tok.is(AsmToken::Real); 2870 SMLoc S = getLoc(); 2871 bool Negate = false; 2872 2873 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2874 lex(); 2875 IsReal = true; 2876 Negate = true; 2877 } 2878 2879 if (IsReal) { 2880 // Floating-point expressions are not supported. 2881 // Can only allow floating-point literals with an 2882 // optional sign. 2883 2884 StringRef Num = getTokenStr(); 2885 lex(); 2886 2887 APFloat RealVal(APFloat::IEEEdouble()); 2888 auto roundMode = APFloat::rmNearestTiesToEven; 2889 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2890 return MatchOperand_ParseFail; 2891 } 2892 if (Negate) 2893 RealVal.changeSign(); 2894 2895 Operands.push_back( 2896 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2897 AMDGPUOperand::ImmTyNone, true)); 2898 2899 return MatchOperand_Success; 2900 2901 } else { 2902 int64_t IntVal; 2903 const MCExpr *Expr; 2904 SMLoc S = getLoc(); 2905 2906 if (HasSP3AbsModifier) { 2907 // This is a workaround for handling expressions 2908 // as arguments of SP3 'abs' modifier, for example: 2909 // |1.0| 2910 // |-1| 2911 // |1+x| 2912 // This syntax is not compatible with syntax of standard 2913 // MC expressions (due to the trailing '|'). 2914 SMLoc EndLoc; 2915 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2916 return MatchOperand_ParseFail; 2917 } else { 2918 if (Parser.parseExpression(Expr)) 2919 return MatchOperand_ParseFail; 2920 } 2921 2922 if (Expr->evaluateAsAbsolute(IntVal)) { 2923 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2924 } else { 2925 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2926 } 2927 2928 return MatchOperand_Success; 2929 } 2930 2931 return MatchOperand_NoMatch; 2932 } 2933 2934 OperandMatchResultTy 2935 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2936 if (!isRegister()) 2937 return MatchOperand_NoMatch; 2938 2939 if (auto R = parseRegister()) { 2940 assert(R->isReg()); 2941 Operands.push_back(std::move(R)); 2942 return MatchOperand_Success; 2943 } 2944 return MatchOperand_ParseFail; 2945 } 2946 2947 OperandMatchResultTy 2948 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2949 auto res = parseReg(Operands); 2950 if (res != MatchOperand_NoMatch) { 2951 return res; 2952 } else if (isModifier()) { 2953 return MatchOperand_NoMatch; 2954 } else { 2955 return parseImm(Operands, HasSP3AbsMod); 2956 } 2957 } 2958 2959 bool 2960 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2961 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2962 const auto &str = Token.getString(); 2963 return str == "abs" || str == "neg" || str == "sext"; 2964 } 2965 return false; 2966 } 2967 2968 bool 2969 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2970 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2971 } 2972 2973 bool 2974 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2975 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2976 } 2977 2978 bool 2979 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2980 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2981 } 2982 2983 // Check if this is an operand modifier or an opcode modifier 2984 // which may look like an expression but it is not. We should 2985 // avoid parsing these modifiers as expressions. Currently 2986 // recognized sequences are: 2987 // |...| 2988 // abs(...) 2989 // neg(...) 2990 // sext(...) 2991 // -reg 2992 // -|...| 2993 // -abs(...) 2994 // name:... 2995 // Note that simple opcode modifiers like 'gds' may be parsed as 2996 // expressions; this is a special case. See getExpressionAsToken. 2997 // 2998 bool 2999 AMDGPUAsmParser::isModifier() { 3000 3001 AsmToken Tok = getToken(); 3002 AsmToken NextToken[2]; 3003 peekTokens(NextToken); 3004 3005 return isOperandModifier(Tok, NextToken[0]) || 3006 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3007 isOpcodeModifierWithVal(Tok, NextToken[0]); 3008 } 3009 3010 // Check if the current token is an SP3 'neg' modifier. 3011 // Currently this modifier is allowed in the following context: 3012 // 3013 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3014 // 2. Before an 'abs' modifier: -abs(...) 3015 // 3. Before an SP3 'abs' modifier: -|...| 3016 // 3017 // In all other cases "-" is handled as a part 3018 // of an expression that follows the sign. 3019 // 3020 // Note: When "-" is followed by an integer literal, 3021 // this is interpreted as integer negation rather 3022 // than a floating-point NEG modifier applied to N. 3023 // Beside being contr-intuitive, such use of floating-point 3024 // NEG modifier would have resulted in different meaning 3025 // of integer literals used with VOP1/2/C and VOP3, 3026 // for example: 3027 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3028 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3029 // Negative fp literals with preceding "-" are 3030 // handled likewise for uniformity 3031 // 3032 bool 3033 AMDGPUAsmParser::parseSP3NegModifier() { 3034 3035 AsmToken NextToken[2]; 3036 peekTokens(NextToken); 3037 3038 if (isToken(AsmToken::Minus) && 3039 (isRegister(NextToken[0], NextToken[1]) || 3040 NextToken[0].is(AsmToken::Pipe) || 3041 isId(NextToken[0], "abs"))) { 3042 lex(); 3043 return true; 3044 } 3045 3046 return false; 3047 } 3048 3049 OperandMatchResultTy 3050 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3051 bool AllowImm) { 3052 bool Neg, SP3Neg; 3053 bool Abs, SP3Abs; 3054 SMLoc Loc; 3055 3056 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3057 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3058 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3059 return MatchOperand_ParseFail; 3060 } 3061 3062 SP3Neg = parseSP3NegModifier(); 3063 3064 Loc = getLoc(); 3065 Neg = trySkipId("neg"); 3066 if (Neg && SP3Neg) { 3067 Error(Loc, "expected register or immediate"); 3068 return MatchOperand_ParseFail; 3069 } 3070 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3071 return MatchOperand_ParseFail; 3072 3073 Abs = trySkipId("abs"); 3074 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3075 return MatchOperand_ParseFail; 3076 3077 Loc = getLoc(); 3078 SP3Abs = trySkipToken(AsmToken::Pipe); 3079 if (Abs && SP3Abs) { 3080 Error(Loc, "expected register or immediate"); 3081 return MatchOperand_ParseFail; 3082 } 3083 3084 OperandMatchResultTy Res; 3085 if (AllowImm) { 3086 Res = parseRegOrImm(Operands, SP3Abs); 3087 } else { 3088 Res = parseReg(Operands); 3089 } 3090 if (Res != MatchOperand_Success) { 3091 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3092 } 3093 3094 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3095 return MatchOperand_ParseFail; 3096 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3097 return MatchOperand_ParseFail; 3098 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3099 return MatchOperand_ParseFail; 3100 3101 AMDGPUOperand::Modifiers Mods; 3102 Mods.Abs = Abs || SP3Abs; 3103 Mods.Neg = Neg || SP3Neg; 3104 3105 if (Mods.hasFPModifiers()) { 3106 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3107 if (Op.isExpr()) { 3108 Error(Op.getStartLoc(), "expected an absolute expression"); 3109 return MatchOperand_ParseFail; 3110 } 3111 Op.setModifiers(Mods); 3112 } 3113 return MatchOperand_Success; 3114 } 3115 3116 OperandMatchResultTy 3117 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3118 bool AllowImm) { 3119 bool Sext = trySkipId("sext"); 3120 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3121 return MatchOperand_ParseFail; 3122 3123 OperandMatchResultTy Res; 3124 if (AllowImm) { 3125 Res = parseRegOrImm(Operands); 3126 } else { 3127 Res = parseReg(Operands); 3128 } 3129 if (Res != MatchOperand_Success) { 3130 return Sext? MatchOperand_ParseFail : Res; 3131 } 3132 3133 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3134 return MatchOperand_ParseFail; 3135 3136 AMDGPUOperand::Modifiers Mods; 3137 Mods.Sext = Sext; 3138 3139 if (Mods.hasIntModifiers()) { 3140 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3141 if (Op.isExpr()) { 3142 Error(Op.getStartLoc(), "expected an absolute expression"); 3143 return MatchOperand_ParseFail; 3144 } 3145 Op.setModifiers(Mods); 3146 } 3147 3148 return MatchOperand_Success; 3149 } 3150 3151 OperandMatchResultTy 3152 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3153 return parseRegOrImmWithFPInputMods(Operands, false); 3154 } 3155 3156 OperandMatchResultTy 3157 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3158 return parseRegOrImmWithIntInputMods(Operands, false); 3159 } 3160 3161 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3162 auto Loc = getLoc(); 3163 if (trySkipId("off")) { 3164 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3165 AMDGPUOperand::ImmTyOff, false)); 3166 return MatchOperand_Success; 3167 } 3168 3169 if (!isRegister()) 3170 return MatchOperand_NoMatch; 3171 3172 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3173 if (Reg) { 3174 Operands.push_back(std::move(Reg)); 3175 return MatchOperand_Success; 3176 } 3177 3178 return MatchOperand_ParseFail; 3179 3180 } 3181 3182 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3183 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3184 3185 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3186 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3187 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3188 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3189 return Match_InvalidOperand; 3190 3191 if ((TSFlags & SIInstrFlags::VOP3) && 3192 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3193 getForcedEncodingSize() != 64) 3194 return Match_PreferE32; 3195 3196 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3197 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3198 // v_mac_f32/16 allow only dst_sel == DWORD; 3199 auto OpNum = 3200 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3201 const auto &Op = Inst.getOperand(OpNum); 3202 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3203 return Match_InvalidOperand; 3204 } 3205 } 3206 3207 return Match_Success; 3208 } 3209 3210 static ArrayRef<unsigned> getAllVariants() { 3211 static const unsigned Variants[] = { 3212 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3213 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3214 }; 3215 3216 return makeArrayRef(Variants); 3217 } 3218 3219 // What asm variants we should check 3220 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3221 if (getForcedEncodingSize() == 32) { 3222 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3223 return makeArrayRef(Variants); 3224 } 3225 3226 if (isForcedVOP3()) { 3227 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3228 return makeArrayRef(Variants); 3229 } 3230 3231 if (isForcedSDWA()) { 3232 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3233 AMDGPUAsmVariants::SDWA9}; 3234 return makeArrayRef(Variants); 3235 } 3236 3237 if (isForcedDPP()) { 3238 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3239 return makeArrayRef(Variants); 3240 } 3241 3242 return getAllVariants(); 3243 } 3244 3245 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3246 if (getForcedEncodingSize() == 32) 3247 return "e32"; 3248 3249 if (isForcedVOP3()) 3250 return "e64"; 3251 3252 if (isForcedSDWA()) 3253 return "sdwa"; 3254 3255 if (isForcedDPP()) 3256 return "dpp"; 3257 3258 return ""; 3259 } 3260 3261 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3262 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3263 const unsigned Num = Desc.getNumImplicitUses(); 3264 for (unsigned i = 0; i < Num; ++i) { 3265 unsigned Reg = Desc.ImplicitUses[i]; 3266 switch (Reg) { 3267 case AMDGPU::FLAT_SCR: 3268 case AMDGPU::VCC: 3269 case AMDGPU::VCC_LO: 3270 case AMDGPU::VCC_HI: 3271 case AMDGPU::M0: 3272 return Reg; 3273 default: 3274 break; 3275 } 3276 } 3277 return AMDGPU::NoRegister; 3278 } 3279 3280 // NB: This code is correct only when used to check constant 3281 // bus limitations because GFX7 support no f16 inline constants. 3282 // Note that there are no cases when a GFX7 opcode violates 3283 // constant bus limitations due to the use of an f16 constant. 3284 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3285 unsigned OpIdx) const { 3286 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3287 3288 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3289 return false; 3290 } 3291 3292 const MCOperand &MO = Inst.getOperand(OpIdx); 3293 3294 int64_t Val = MO.getImm(); 3295 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3296 3297 switch (OpSize) { // expected operand size 3298 case 8: 3299 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3300 case 4: 3301 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3302 case 2: { 3303 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3304 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3305 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3306 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3307 return AMDGPU::isInlinableIntLiteral(Val); 3308 3309 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3310 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3311 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3312 return AMDGPU::isInlinableIntLiteralV216(Val); 3313 3314 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3315 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3316 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3317 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3318 3319 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3320 } 3321 default: 3322 llvm_unreachable("invalid operand size"); 3323 } 3324 } 3325 3326 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3327 if (!isGFX10Plus()) 3328 return 1; 3329 3330 switch (Opcode) { 3331 // 64-bit shift instructions can use only one scalar value input 3332 case AMDGPU::V_LSHLREV_B64_e64: 3333 case AMDGPU::V_LSHLREV_B64_gfx10: 3334 case AMDGPU::V_LSHRREV_B64_e64: 3335 case AMDGPU::V_LSHRREV_B64_gfx10: 3336 case AMDGPU::V_ASHRREV_I64_e64: 3337 case AMDGPU::V_ASHRREV_I64_gfx10: 3338 case AMDGPU::V_LSHL_B64_e64: 3339 case AMDGPU::V_LSHR_B64_e64: 3340 case AMDGPU::V_ASHR_I64_e64: 3341 return 1; 3342 default: 3343 return 2; 3344 } 3345 } 3346 3347 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3348 const MCOperand &MO = Inst.getOperand(OpIdx); 3349 if (MO.isImm()) { 3350 return !isInlineConstant(Inst, OpIdx); 3351 } else if (MO.isReg()) { 3352 auto Reg = MO.getReg(); 3353 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3354 auto PReg = mc2PseudoReg(Reg); 3355 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3356 } else { 3357 return true; 3358 } 3359 } 3360 3361 bool 3362 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3363 const OperandVector &Operands) { 3364 const unsigned Opcode = Inst.getOpcode(); 3365 const MCInstrDesc &Desc = MII.get(Opcode); 3366 unsigned LastSGPR = AMDGPU::NoRegister; 3367 unsigned ConstantBusUseCount = 0; 3368 unsigned NumLiterals = 0; 3369 unsigned LiteralSize; 3370 3371 if (Desc.TSFlags & 3372 (SIInstrFlags::VOPC | 3373 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3374 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3375 SIInstrFlags::SDWA)) { 3376 // Check special imm operands (used by madmk, etc) 3377 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3378 ++NumLiterals; 3379 LiteralSize = 4; 3380 } 3381 3382 SmallDenseSet<unsigned> SGPRsUsed; 3383 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3384 if (SGPRUsed != AMDGPU::NoRegister) { 3385 SGPRsUsed.insert(SGPRUsed); 3386 ++ConstantBusUseCount; 3387 } 3388 3389 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3390 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3391 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3392 3393 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3394 3395 for (int OpIdx : OpIndices) { 3396 if (OpIdx == -1) break; 3397 3398 const MCOperand &MO = Inst.getOperand(OpIdx); 3399 if (usesConstantBus(Inst, OpIdx)) { 3400 if (MO.isReg()) { 3401 LastSGPR = mc2PseudoReg(MO.getReg()); 3402 // Pairs of registers with a partial intersections like these 3403 // s0, s[0:1] 3404 // flat_scratch_lo, flat_scratch 3405 // flat_scratch_lo, flat_scratch_hi 3406 // are theoretically valid but they are disabled anyway. 3407 // Note that this code mimics SIInstrInfo::verifyInstruction 3408 if (!SGPRsUsed.count(LastSGPR)) { 3409 SGPRsUsed.insert(LastSGPR); 3410 ++ConstantBusUseCount; 3411 } 3412 } else { // Expression or a literal 3413 3414 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3415 continue; // special operand like VINTERP attr_chan 3416 3417 // An instruction may use only one literal. 3418 // This has been validated on the previous step. 3419 // See validateVOPLiteral. 3420 // This literal may be used as more than one operand. 3421 // If all these operands are of the same size, 3422 // this literal counts as one scalar value. 3423 // Otherwise it counts as 2 scalar values. 3424 // See "GFX10 Shader Programming", section 3.6.2.3. 3425 3426 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3427 if (Size < 4) Size = 4; 3428 3429 if (NumLiterals == 0) { 3430 NumLiterals = 1; 3431 LiteralSize = Size; 3432 } else if (LiteralSize != Size) { 3433 NumLiterals = 2; 3434 } 3435 } 3436 } 3437 } 3438 } 3439 ConstantBusUseCount += NumLiterals; 3440 3441 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3442 return true; 3443 3444 SMLoc LitLoc = getLitLoc(Operands); 3445 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3446 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3447 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3448 return false; 3449 } 3450 3451 bool 3452 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3453 const OperandVector &Operands) { 3454 const unsigned Opcode = Inst.getOpcode(); 3455 const MCInstrDesc &Desc = MII.get(Opcode); 3456 3457 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3458 if (DstIdx == -1 || 3459 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3460 return true; 3461 } 3462 3463 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3464 3465 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3466 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3467 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3468 3469 assert(DstIdx != -1); 3470 const MCOperand &Dst = Inst.getOperand(DstIdx); 3471 assert(Dst.isReg()); 3472 3473 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3474 3475 for (int SrcIdx : SrcIndices) { 3476 if (SrcIdx == -1) break; 3477 const MCOperand &Src = Inst.getOperand(SrcIdx); 3478 if (Src.isReg()) { 3479 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3480 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3481 Error(getRegLoc(SrcReg, Operands), 3482 "destination must be different than all sources"); 3483 return false; 3484 } 3485 } 3486 } 3487 3488 return true; 3489 } 3490 3491 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3492 3493 const unsigned Opc = Inst.getOpcode(); 3494 const MCInstrDesc &Desc = MII.get(Opc); 3495 3496 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3497 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3498 assert(ClampIdx != -1); 3499 return Inst.getOperand(ClampIdx).getImm() == 0; 3500 } 3501 3502 return true; 3503 } 3504 3505 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3506 3507 const unsigned Opc = Inst.getOpcode(); 3508 const MCInstrDesc &Desc = MII.get(Opc); 3509 3510 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3511 return None; 3512 3513 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3514 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3515 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3516 3517 assert(VDataIdx != -1); 3518 3519 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3520 return None; 3521 3522 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3523 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3524 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3525 if (DMask == 0) 3526 DMask = 1; 3527 3528 bool isPackedD16 = false; 3529 unsigned DataSize = 3530 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3531 if (hasPackedD16()) { 3532 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3533 isPackedD16 = D16Idx >= 0; 3534 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3535 DataSize = (DataSize + 1) / 2; 3536 } 3537 3538 if ((VDataSize / 4) == DataSize + TFESize) 3539 return None; 3540 3541 return StringRef(isPackedD16 3542 ? "image data size does not match dmask, d16 and tfe" 3543 : "image data size does not match dmask and tfe"); 3544 } 3545 3546 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3547 const unsigned Opc = Inst.getOpcode(); 3548 const MCInstrDesc &Desc = MII.get(Opc); 3549 3550 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3551 return true; 3552 3553 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3554 3555 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3556 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3557 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3558 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3559 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3560 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3561 3562 assert(VAddr0Idx != -1); 3563 assert(SrsrcIdx != -1); 3564 assert(SrsrcIdx > VAddr0Idx); 3565 3566 if (DimIdx == -1) 3567 return true; // intersect_ray 3568 3569 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3570 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3571 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3572 unsigned ActualAddrSize = 3573 IsNSA ? SrsrcIdx - VAddr0Idx 3574 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3575 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3576 3577 unsigned ExpectedAddrSize = 3578 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3579 3580 if (!IsNSA) { 3581 if (ExpectedAddrSize > 8) 3582 ExpectedAddrSize = 16; 3583 3584 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3585 // This provides backward compatibility for assembly created 3586 // before 160b/192b/224b types were directly supported. 3587 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3588 return true; 3589 } 3590 3591 return ActualAddrSize == ExpectedAddrSize; 3592 } 3593 3594 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3595 3596 const unsigned Opc = Inst.getOpcode(); 3597 const MCInstrDesc &Desc = MII.get(Opc); 3598 3599 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3600 return true; 3601 if (!Desc.mayLoad() || !Desc.mayStore()) 3602 return true; // Not atomic 3603 3604 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3605 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3606 3607 // This is an incomplete check because image_atomic_cmpswap 3608 // may only use 0x3 and 0xf while other atomic operations 3609 // may use 0x1 and 0x3. However these limitations are 3610 // verified when we check that dmask matches dst size. 3611 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3612 } 3613 3614 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3615 3616 const unsigned Opc = Inst.getOpcode(); 3617 const MCInstrDesc &Desc = MII.get(Opc); 3618 3619 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3620 return true; 3621 3622 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3623 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3624 3625 // GATHER4 instructions use dmask in a different fashion compared to 3626 // other MIMG instructions. The only useful DMASK values are 3627 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3628 // (red,red,red,red) etc.) The ISA document doesn't mention 3629 // this. 3630 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3631 } 3632 3633 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3634 const unsigned Opc = Inst.getOpcode(); 3635 const MCInstrDesc &Desc = MII.get(Opc); 3636 3637 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3638 return true; 3639 3640 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3641 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3642 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3643 3644 if (!BaseOpcode->MSAA) 3645 return true; 3646 3647 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3648 assert(DimIdx != -1); 3649 3650 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3651 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3652 3653 return DimInfo->MSAA; 3654 } 3655 3656 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3657 { 3658 switch (Opcode) { 3659 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3660 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3661 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3662 return true; 3663 default: 3664 return false; 3665 } 3666 } 3667 3668 // movrels* opcodes should only allow VGPRS as src0. 3669 // This is specified in .td description for vop1/vop3, 3670 // but sdwa is handled differently. See isSDWAOperand. 3671 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3672 const OperandVector &Operands) { 3673 3674 const unsigned Opc = Inst.getOpcode(); 3675 const MCInstrDesc &Desc = MII.get(Opc); 3676 3677 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3678 return true; 3679 3680 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3681 assert(Src0Idx != -1); 3682 3683 SMLoc ErrLoc; 3684 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3685 if (Src0.isReg()) { 3686 auto Reg = mc2PseudoReg(Src0.getReg()); 3687 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3688 if (!isSGPR(Reg, TRI)) 3689 return true; 3690 ErrLoc = getRegLoc(Reg, Operands); 3691 } else { 3692 ErrLoc = getConstLoc(Operands); 3693 } 3694 3695 Error(ErrLoc, "source operand must be a VGPR"); 3696 return false; 3697 } 3698 3699 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3700 const OperandVector &Operands) { 3701 3702 const unsigned Opc = Inst.getOpcode(); 3703 3704 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3705 return true; 3706 3707 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3708 assert(Src0Idx != -1); 3709 3710 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3711 if (!Src0.isReg()) 3712 return true; 3713 3714 auto Reg = mc2PseudoReg(Src0.getReg()); 3715 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3716 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3717 Error(getRegLoc(Reg, Operands), 3718 "source operand must be either a VGPR or an inline constant"); 3719 return false; 3720 } 3721 3722 return true; 3723 } 3724 3725 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3726 const OperandVector &Operands) { 3727 const unsigned Opc = Inst.getOpcode(); 3728 const MCInstrDesc &Desc = MII.get(Opc); 3729 3730 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3731 return true; 3732 3733 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3734 if (Src2Idx == -1) 3735 return true; 3736 3737 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3738 if (!Src2.isReg()) 3739 return true; 3740 3741 MCRegister Src2Reg = Src2.getReg(); 3742 MCRegister DstReg = Inst.getOperand(0).getReg(); 3743 if (Src2Reg == DstReg) 3744 return true; 3745 3746 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3747 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3748 return true; 3749 3750 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3751 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3752 "source 2 operand must not partially overlap with dst"); 3753 return false; 3754 } 3755 3756 return true; 3757 } 3758 3759 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3760 switch (Inst.getOpcode()) { 3761 default: 3762 return true; 3763 case V_DIV_SCALE_F32_gfx6_gfx7: 3764 case V_DIV_SCALE_F32_vi: 3765 case V_DIV_SCALE_F32_gfx10: 3766 case V_DIV_SCALE_F64_gfx6_gfx7: 3767 case V_DIV_SCALE_F64_vi: 3768 case V_DIV_SCALE_F64_gfx10: 3769 break; 3770 } 3771 3772 // TODO: Check that src0 = src1 or src2. 3773 3774 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3775 AMDGPU::OpName::src2_modifiers, 3776 AMDGPU::OpName::src2_modifiers}) { 3777 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3778 .getImm() & 3779 SISrcMods::ABS) { 3780 return false; 3781 } 3782 } 3783 3784 return true; 3785 } 3786 3787 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3788 3789 const unsigned Opc = Inst.getOpcode(); 3790 const MCInstrDesc &Desc = MII.get(Opc); 3791 3792 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3793 return true; 3794 3795 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3796 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3797 if (isCI() || isSI()) 3798 return false; 3799 } 3800 3801 return true; 3802 } 3803 3804 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3805 const unsigned Opc = Inst.getOpcode(); 3806 const MCInstrDesc &Desc = MII.get(Opc); 3807 3808 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3809 return true; 3810 3811 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3812 if (DimIdx < 0) 3813 return true; 3814 3815 long Imm = Inst.getOperand(DimIdx).getImm(); 3816 if (Imm < 0 || Imm >= 8) 3817 return false; 3818 3819 return true; 3820 } 3821 3822 static bool IsRevOpcode(const unsigned Opcode) 3823 { 3824 switch (Opcode) { 3825 case AMDGPU::V_SUBREV_F32_e32: 3826 case AMDGPU::V_SUBREV_F32_e64: 3827 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3828 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3829 case AMDGPU::V_SUBREV_F32_e32_vi: 3830 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3831 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3832 case AMDGPU::V_SUBREV_F32_e64_vi: 3833 3834 case AMDGPU::V_SUBREV_CO_U32_e32: 3835 case AMDGPU::V_SUBREV_CO_U32_e64: 3836 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3837 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3838 3839 case AMDGPU::V_SUBBREV_U32_e32: 3840 case AMDGPU::V_SUBBREV_U32_e64: 3841 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3842 case AMDGPU::V_SUBBREV_U32_e32_vi: 3843 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3844 case AMDGPU::V_SUBBREV_U32_e64_vi: 3845 3846 case AMDGPU::V_SUBREV_U32_e32: 3847 case AMDGPU::V_SUBREV_U32_e64: 3848 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3849 case AMDGPU::V_SUBREV_U32_e32_vi: 3850 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3851 case AMDGPU::V_SUBREV_U32_e64_vi: 3852 3853 case AMDGPU::V_SUBREV_F16_e32: 3854 case AMDGPU::V_SUBREV_F16_e64: 3855 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3856 case AMDGPU::V_SUBREV_F16_e32_vi: 3857 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3858 case AMDGPU::V_SUBREV_F16_e64_vi: 3859 3860 case AMDGPU::V_SUBREV_U16_e32: 3861 case AMDGPU::V_SUBREV_U16_e64: 3862 case AMDGPU::V_SUBREV_U16_e32_vi: 3863 case AMDGPU::V_SUBREV_U16_e64_vi: 3864 3865 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3866 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3867 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3868 3869 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3870 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3871 3872 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3873 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3874 3875 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3876 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3877 3878 case AMDGPU::V_LSHRREV_B32_e32: 3879 case AMDGPU::V_LSHRREV_B32_e64: 3880 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3881 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3882 case AMDGPU::V_LSHRREV_B32_e32_vi: 3883 case AMDGPU::V_LSHRREV_B32_e64_vi: 3884 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3885 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3886 3887 case AMDGPU::V_ASHRREV_I32_e32: 3888 case AMDGPU::V_ASHRREV_I32_e64: 3889 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3890 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3891 case AMDGPU::V_ASHRREV_I32_e32_vi: 3892 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3893 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3894 case AMDGPU::V_ASHRREV_I32_e64_vi: 3895 3896 case AMDGPU::V_LSHLREV_B32_e32: 3897 case AMDGPU::V_LSHLREV_B32_e64: 3898 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3899 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3900 case AMDGPU::V_LSHLREV_B32_e32_vi: 3901 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3902 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3903 case AMDGPU::V_LSHLREV_B32_e64_vi: 3904 3905 case AMDGPU::V_LSHLREV_B16_e32: 3906 case AMDGPU::V_LSHLREV_B16_e64: 3907 case AMDGPU::V_LSHLREV_B16_e32_vi: 3908 case AMDGPU::V_LSHLREV_B16_e64_vi: 3909 case AMDGPU::V_LSHLREV_B16_gfx10: 3910 3911 case AMDGPU::V_LSHRREV_B16_e32: 3912 case AMDGPU::V_LSHRREV_B16_e64: 3913 case AMDGPU::V_LSHRREV_B16_e32_vi: 3914 case AMDGPU::V_LSHRREV_B16_e64_vi: 3915 case AMDGPU::V_LSHRREV_B16_gfx10: 3916 3917 case AMDGPU::V_ASHRREV_I16_e32: 3918 case AMDGPU::V_ASHRREV_I16_e64: 3919 case AMDGPU::V_ASHRREV_I16_e32_vi: 3920 case AMDGPU::V_ASHRREV_I16_e64_vi: 3921 case AMDGPU::V_ASHRREV_I16_gfx10: 3922 3923 case AMDGPU::V_LSHLREV_B64_e64: 3924 case AMDGPU::V_LSHLREV_B64_gfx10: 3925 case AMDGPU::V_LSHLREV_B64_vi: 3926 3927 case AMDGPU::V_LSHRREV_B64_e64: 3928 case AMDGPU::V_LSHRREV_B64_gfx10: 3929 case AMDGPU::V_LSHRREV_B64_vi: 3930 3931 case AMDGPU::V_ASHRREV_I64_e64: 3932 case AMDGPU::V_ASHRREV_I64_gfx10: 3933 case AMDGPU::V_ASHRREV_I64_vi: 3934 3935 case AMDGPU::V_PK_LSHLREV_B16: 3936 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3937 case AMDGPU::V_PK_LSHLREV_B16_vi: 3938 3939 case AMDGPU::V_PK_LSHRREV_B16: 3940 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3941 case AMDGPU::V_PK_LSHRREV_B16_vi: 3942 case AMDGPU::V_PK_ASHRREV_I16: 3943 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3944 case AMDGPU::V_PK_ASHRREV_I16_vi: 3945 return true; 3946 default: 3947 return false; 3948 } 3949 } 3950 3951 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3952 3953 using namespace SIInstrFlags; 3954 const unsigned Opcode = Inst.getOpcode(); 3955 const MCInstrDesc &Desc = MII.get(Opcode); 3956 3957 // lds_direct register is defined so that it can be used 3958 // with 9-bit operands only. Ignore encodings which do not accept these. 3959 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3960 if ((Desc.TSFlags & Enc) == 0) 3961 return None; 3962 3963 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3964 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3965 if (SrcIdx == -1) 3966 break; 3967 const auto &Src = Inst.getOperand(SrcIdx); 3968 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3969 3970 if (isGFX90A()) 3971 return StringRef("lds_direct is not supported on this GPU"); 3972 3973 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3974 return StringRef("lds_direct cannot be used with this instruction"); 3975 3976 if (SrcName != OpName::src0) 3977 return StringRef("lds_direct may be used as src0 only"); 3978 } 3979 } 3980 3981 return None; 3982 } 3983 3984 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3985 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3986 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3987 if (Op.isFlatOffset()) 3988 return Op.getStartLoc(); 3989 } 3990 return getLoc(); 3991 } 3992 3993 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3994 const OperandVector &Operands) { 3995 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3996 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3997 return true; 3998 3999 auto Opcode = Inst.getOpcode(); 4000 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4001 assert(OpNum != -1); 4002 4003 const auto &Op = Inst.getOperand(OpNum); 4004 if (!hasFlatOffsets() && Op.getImm() != 0) { 4005 Error(getFlatOffsetLoc(Operands), 4006 "flat offset modifier is not supported on this GPU"); 4007 return false; 4008 } 4009 4010 // For FLAT segment the offset must be positive; 4011 // MSB is ignored and forced to zero. 4012 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4013 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4014 if (!isIntN(OffsetSize, Op.getImm())) { 4015 Error(getFlatOffsetLoc(Operands), 4016 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4017 return false; 4018 } 4019 } else { 4020 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4021 if (!isUIntN(OffsetSize, Op.getImm())) { 4022 Error(getFlatOffsetLoc(Operands), 4023 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4024 return false; 4025 } 4026 } 4027 4028 return true; 4029 } 4030 4031 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4032 // Start with second operand because SMEM Offset cannot be dst or src0. 4033 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4034 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4035 if (Op.isSMEMOffset()) 4036 return Op.getStartLoc(); 4037 } 4038 return getLoc(); 4039 } 4040 4041 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4042 const OperandVector &Operands) { 4043 if (isCI() || isSI()) 4044 return true; 4045 4046 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4047 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4048 return true; 4049 4050 auto Opcode = Inst.getOpcode(); 4051 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4052 if (OpNum == -1) 4053 return true; 4054 4055 const auto &Op = Inst.getOperand(OpNum); 4056 if (!Op.isImm()) 4057 return true; 4058 4059 uint64_t Offset = Op.getImm(); 4060 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4061 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4062 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4063 return true; 4064 4065 Error(getSMEMOffsetLoc(Operands), 4066 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4067 "expected a 21-bit signed offset"); 4068 4069 return false; 4070 } 4071 4072 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4073 unsigned Opcode = Inst.getOpcode(); 4074 const MCInstrDesc &Desc = MII.get(Opcode); 4075 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4076 return true; 4077 4078 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4079 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4080 4081 const int OpIndices[] = { Src0Idx, Src1Idx }; 4082 4083 unsigned NumExprs = 0; 4084 unsigned NumLiterals = 0; 4085 uint32_t LiteralValue; 4086 4087 for (int OpIdx : OpIndices) { 4088 if (OpIdx == -1) break; 4089 4090 const MCOperand &MO = Inst.getOperand(OpIdx); 4091 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4092 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4093 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4094 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4095 if (NumLiterals == 0 || LiteralValue != Value) { 4096 LiteralValue = Value; 4097 ++NumLiterals; 4098 } 4099 } else if (MO.isExpr()) { 4100 ++NumExprs; 4101 } 4102 } 4103 } 4104 4105 return NumLiterals + NumExprs <= 1; 4106 } 4107 4108 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4109 const unsigned Opc = Inst.getOpcode(); 4110 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4111 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4112 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4113 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4114 4115 if (OpSel & ~3) 4116 return false; 4117 } 4118 4119 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4120 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4121 if (OpSelIdx != -1) { 4122 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4123 return false; 4124 } 4125 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4126 if (OpSelHiIdx != -1) { 4127 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4128 return false; 4129 } 4130 } 4131 4132 return true; 4133 } 4134 4135 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4136 const OperandVector &Operands) { 4137 const unsigned Opc = Inst.getOpcode(); 4138 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4139 if (DppCtrlIdx < 0) 4140 return true; 4141 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4142 4143 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4144 // DPP64 is supported for row_newbcast only. 4145 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4146 if (Src0Idx >= 0 && 4147 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4148 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4149 Error(S, "64 bit dpp only supports row_newbcast"); 4150 return false; 4151 } 4152 } 4153 4154 return true; 4155 } 4156 4157 // Check if VCC register matches wavefront size 4158 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4159 auto FB = getFeatureBits(); 4160 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4161 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4162 } 4163 4164 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4165 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4166 const OperandVector &Operands) { 4167 unsigned Opcode = Inst.getOpcode(); 4168 const MCInstrDesc &Desc = MII.get(Opcode); 4169 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4170 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4171 ImmIdx == -1) 4172 return true; 4173 4174 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4175 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4176 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4177 4178 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4179 4180 unsigned NumExprs = 0; 4181 unsigned NumLiterals = 0; 4182 uint32_t LiteralValue; 4183 4184 for (int OpIdx : OpIndices) { 4185 if (OpIdx == -1) 4186 continue; 4187 4188 const MCOperand &MO = Inst.getOperand(OpIdx); 4189 if (!MO.isImm() && !MO.isExpr()) 4190 continue; 4191 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4192 continue; 4193 4194 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4195 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4196 Error(getConstLoc(Operands), 4197 "inline constants are not allowed for this operand"); 4198 return false; 4199 } 4200 4201 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4202 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4203 if (NumLiterals == 0 || LiteralValue != Value) { 4204 LiteralValue = Value; 4205 ++NumLiterals; 4206 } 4207 } else if (MO.isExpr()) { 4208 ++NumExprs; 4209 } 4210 } 4211 NumLiterals += NumExprs; 4212 4213 if (!NumLiterals) 4214 return true; 4215 4216 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4217 Error(getLitLoc(Operands), "literal operands are not supported"); 4218 return false; 4219 } 4220 4221 if (NumLiterals > 1) { 4222 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4223 return false; 4224 } 4225 4226 return true; 4227 } 4228 4229 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4230 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4231 const MCRegisterInfo *MRI) { 4232 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4233 if (OpIdx < 0) 4234 return -1; 4235 4236 const MCOperand &Op = Inst.getOperand(OpIdx); 4237 if (!Op.isReg()) 4238 return -1; 4239 4240 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4241 auto Reg = Sub ? Sub : Op.getReg(); 4242 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4243 return AGPR32.contains(Reg) ? 1 : 0; 4244 } 4245 4246 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4247 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4248 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4249 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4250 SIInstrFlags::DS)) == 0) 4251 return true; 4252 4253 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4254 : AMDGPU::OpName::vdata; 4255 4256 const MCRegisterInfo *MRI = getMRI(); 4257 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4258 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4259 4260 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4261 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4262 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4263 return false; 4264 } 4265 4266 auto FB = getFeatureBits(); 4267 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4268 if (DataAreg < 0 || DstAreg < 0) 4269 return true; 4270 return DstAreg == DataAreg; 4271 } 4272 4273 return DstAreg < 1 && DataAreg < 1; 4274 } 4275 4276 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4277 auto FB = getFeatureBits(); 4278 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4279 return true; 4280 4281 const MCRegisterInfo *MRI = getMRI(); 4282 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4283 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4284 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4285 const MCOperand &Op = Inst.getOperand(I); 4286 if (!Op.isReg()) 4287 continue; 4288 4289 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4290 if (!Sub) 4291 continue; 4292 4293 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4294 return false; 4295 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4296 return false; 4297 } 4298 4299 return true; 4300 } 4301 4302 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4303 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4304 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4305 if (Op.isBLGP()) 4306 return Op.getStartLoc(); 4307 } 4308 return SMLoc(); 4309 } 4310 4311 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4312 const OperandVector &Operands) { 4313 unsigned Opc = Inst.getOpcode(); 4314 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4315 if (BlgpIdx == -1) 4316 return true; 4317 SMLoc BLGPLoc = getBLGPLoc(Operands); 4318 if (!BLGPLoc.isValid()) 4319 return true; 4320 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4321 auto FB = getFeatureBits(); 4322 bool UsesNeg = false; 4323 if (FB[AMDGPU::FeatureGFX940Insts]) { 4324 switch (Opc) { 4325 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4326 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4327 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4328 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4329 UsesNeg = true; 4330 } 4331 } 4332 4333 if (IsNeg == UsesNeg) 4334 return true; 4335 4336 Error(BLGPLoc, 4337 UsesNeg ? "invalid modifier: blgp is not supported" 4338 : "invalid modifier: neg is not supported"); 4339 4340 return false; 4341 } 4342 4343 // gfx90a has an undocumented limitation: 4344 // DS_GWS opcodes must use even aligned registers. 4345 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4346 const OperandVector &Operands) { 4347 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4348 return true; 4349 4350 int Opc = Inst.getOpcode(); 4351 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4352 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4353 return true; 4354 4355 const MCRegisterInfo *MRI = getMRI(); 4356 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4357 int Data0Pos = 4358 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4359 assert(Data0Pos != -1); 4360 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4361 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4362 if (RegIdx & 1) { 4363 SMLoc RegLoc = getRegLoc(Reg, Operands); 4364 Error(RegLoc, "vgpr must be even aligned"); 4365 return false; 4366 } 4367 4368 return true; 4369 } 4370 4371 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4372 const OperandVector &Operands, 4373 const SMLoc &IDLoc) { 4374 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4375 AMDGPU::OpName::cpol); 4376 if (CPolPos == -1) 4377 return true; 4378 4379 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4380 4381 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4382 if ((TSFlags & (SIInstrFlags::SMRD)) && 4383 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4384 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4385 return false; 4386 } 4387 4388 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4389 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4390 StringRef CStr(S.getPointer()); 4391 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4392 Error(S, "scc is not supported on this GPU"); 4393 return false; 4394 } 4395 4396 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4397 return true; 4398 4399 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4400 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4401 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4402 : "instruction must use glc"); 4403 return false; 4404 } 4405 } else { 4406 if (CPol & CPol::GLC) { 4407 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4408 StringRef CStr(S.getPointer()); 4409 S = SMLoc::getFromPointer( 4410 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4411 Error(S, isGFX940() ? "instruction must not use sc0" 4412 : "instruction must not use glc"); 4413 return false; 4414 } 4415 } 4416 4417 return true; 4418 } 4419 4420 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4421 const SMLoc &IDLoc, 4422 const OperandVector &Operands) { 4423 if (auto ErrMsg = validateLdsDirect(Inst)) { 4424 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4425 return false; 4426 } 4427 if (!validateSOPLiteral(Inst)) { 4428 Error(getLitLoc(Operands), 4429 "only one literal operand is allowed"); 4430 return false; 4431 } 4432 if (!validateVOPLiteral(Inst, Operands)) { 4433 return false; 4434 } 4435 if (!validateConstantBusLimitations(Inst, Operands)) { 4436 return false; 4437 } 4438 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4439 return false; 4440 } 4441 if (!validateIntClampSupported(Inst)) { 4442 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4443 "integer clamping is not supported on this GPU"); 4444 return false; 4445 } 4446 if (!validateOpSel(Inst)) { 4447 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4448 "invalid op_sel operand"); 4449 return false; 4450 } 4451 if (!validateDPP(Inst, Operands)) { 4452 return false; 4453 } 4454 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4455 if (!validateMIMGD16(Inst)) { 4456 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4457 "d16 modifier is not supported on this GPU"); 4458 return false; 4459 } 4460 if (!validateMIMGDim(Inst)) { 4461 Error(IDLoc, "dim modifier is required on this GPU"); 4462 return false; 4463 } 4464 if (!validateMIMGMSAA(Inst)) { 4465 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4466 "invalid dim; must be MSAA type"); 4467 return false; 4468 } 4469 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4470 Error(IDLoc, *ErrMsg); 4471 return false; 4472 } 4473 if (!validateMIMGAddrSize(Inst)) { 4474 Error(IDLoc, 4475 "image address size does not match dim and a16"); 4476 return false; 4477 } 4478 if (!validateMIMGAtomicDMask(Inst)) { 4479 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4480 "invalid atomic image dmask"); 4481 return false; 4482 } 4483 if (!validateMIMGGatherDMask(Inst)) { 4484 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4485 "invalid image_gather dmask: only one bit must be set"); 4486 return false; 4487 } 4488 if (!validateMovrels(Inst, Operands)) { 4489 return false; 4490 } 4491 if (!validateFlatOffset(Inst, Operands)) { 4492 return false; 4493 } 4494 if (!validateSMEMOffset(Inst, Operands)) { 4495 return false; 4496 } 4497 if (!validateMAIAccWrite(Inst, Operands)) { 4498 return false; 4499 } 4500 if (!validateMFMA(Inst, Operands)) { 4501 return false; 4502 } 4503 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4504 return false; 4505 } 4506 4507 if (!validateAGPRLdSt(Inst)) { 4508 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4509 ? "invalid register class: data and dst should be all VGPR or AGPR" 4510 : "invalid register class: agpr loads and stores not supported on this GPU" 4511 ); 4512 return false; 4513 } 4514 if (!validateVGPRAlign(Inst)) { 4515 Error(IDLoc, 4516 "invalid register class: vgpr tuples must be 64 bit aligned"); 4517 return false; 4518 } 4519 if (!validateGWS(Inst, Operands)) { 4520 return false; 4521 } 4522 4523 if (!validateBLGP(Inst, Operands)) { 4524 return false; 4525 } 4526 4527 if (!validateDivScale(Inst)) { 4528 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4529 return false; 4530 } 4531 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4532 return false; 4533 } 4534 4535 return true; 4536 } 4537 4538 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4539 const FeatureBitset &FBS, 4540 unsigned VariantID = 0); 4541 4542 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4543 const FeatureBitset &AvailableFeatures, 4544 unsigned VariantID); 4545 4546 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4547 const FeatureBitset &FBS) { 4548 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4549 } 4550 4551 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4552 const FeatureBitset &FBS, 4553 ArrayRef<unsigned> Variants) { 4554 for (auto Variant : Variants) { 4555 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4556 return true; 4557 } 4558 4559 return false; 4560 } 4561 4562 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4563 const SMLoc &IDLoc) { 4564 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4565 4566 // Check if requested instruction variant is supported. 4567 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4568 return false; 4569 4570 // This instruction is not supported. 4571 // Clear any other pending errors because they are no longer relevant. 4572 getParser().clearPendingErrors(); 4573 4574 // Requested instruction variant is not supported. 4575 // Check if any other variants are supported. 4576 StringRef VariantName = getMatchedVariantName(); 4577 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4578 return Error(IDLoc, 4579 Twine(VariantName, 4580 " variant of this instruction is not supported")); 4581 } 4582 4583 // Finally check if this instruction is supported on any other GPU. 4584 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4585 return Error(IDLoc, "instruction not supported on this GPU"); 4586 } 4587 4588 // Instruction not supported on any GPU. Probably a typo. 4589 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4590 return Error(IDLoc, "invalid instruction" + Suggestion); 4591 } 4592 4593 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4594 OperandVector &Operands, 4595 MCStreamer &Out, 4596 uint64_t &ErrorInfo, 4597 bool MatchingInlineAsm) { 4598 MCInst Inst; 4599 unsigned Result = Match_Success; 4600 for (auto Variant : getMatchedVariants()) { 4601 uint64_t EI; 4602 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4603 Variant); 4604 // We order match statuses from least to most specific. We use most specific 4605 // status as resulting 4606 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4607 if ((R == Match_Success) || 4608 (R == Match_PreferE32) || 4609 (R == Match_MissingFeature && Result != Match_PreferE32) || 4610 (R == Match_InvalidOperand && Result != Match_MissingFeature 4611 && Result != Match_PreferE32) || 4612 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4613 && Result != Match_MissingFeature 4614 && Result != Match_PreferE32)) { 4615 Result = R; 4616 ErrorInfo = EI; 4617 } 4618 if (R == Match_Success) 4619 break; 4620 } 4621 4622 if (Result == Match_Success) { 4623 if (!validateInstruction(Inst, IDLoc, Operands)) { 4624 return true; 4625 } 4626 Inst.setLoc(IDLoc); 4627 Out.emitInstruction(Inst, getSTI()); 4628 return false; 4629 } 4630 4631 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4632 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4633 return true; 4634 } 4635 4636 switch (Result) { 4637 default: break; 4638 case Match_MissingFeature: 4639 // It has been verified that the specified instruction 4640 // mnemonic is valid. A match was found but it requires 4641 // features which are not supported on this GPU. 4642 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4643 4644 case Match_InvalidOperand: { 4645 SMLoc ErrorLoc = IDLoc; 4646 if (ErrorInfo != ~0ULL) { 4647 if (ErrorInfo >= Operands.size()) { 4648 return Error(IDLoc, "too few operands for instruction"); 4649 } 4650 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4651 if (ErrorLoc == SMLoc()) 4652 ErrorLoc = IDLoc; 4653 } 4654 return Error(ErrorLoc, "invalid operand for instruction"); 4655 } 4656 4657 case Match_PreferE32: 4658 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4659 "should be encoded as e32"); 4660 case Match_MnemonicFail: 4661 llvm_unreachable("Invalid instructions should have been handled already"); 4662 } 4663 llvm_unreachable("Implement any new match types added!"); 4664 } 4665 4666 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4667 int64_t Tmp = -1; 4668 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4669 return true; 4670 } 4671 if (getParser().parseAbsoluteExpression(Tmp)) { 4672 return true; 4673 } 4674 Ret = static_cast<uint32_t>(Tmp); 4675 return false; 4676 } 4677 4678 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4679 uint32_t &Minor) { 4680 if (ParseAsAbsoluteExpression(Major)) 4681 return TokError("invalid major version"); 4682 4683 if (!trySkipToken(AsmToken::Comma)) 4684 return TokError("minor version number required, comma expected"); 4685 4686 if (ParseAsAbsoluteExpression(Minor)) 4687 return TokError("invalid minor version"); 4688 4689 return false; 4690 } 4691 4692 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4693 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4694 return TokError("directive only supported for amdgcn architecture"); 4695 4696 std::string TargetIDDirective; 4697 SMLoc TargetStart = getTok().getLoc(); 4698 if (getParser().parseEscapedString(TargetIDDirective)) 4699 return true; 4700 4701 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4702 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4703 return getParser().Error(TargetRange.Start, 4704 (Twine(".amdgcn_target directive's target id ") + 4705 Twine(TargetIDDirective) + 4706 Twine(" does not match the specified target id ") + 4707 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4708 4709 return false; 4710 } 4711 4712 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4713 return Error(Range.Start, "value out of range", Range); 4714 } 4715 4716 bool AMDGPUAsmParser::calculateGPRBlocks( 4717 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4718 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4719 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4720 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4721 // TODO(scott.linder): These calculations are duplicated from 4722 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4723 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4724 4725 unsigned NumVGPRs = NextFreeVGPR; 4726 unsigned NumSGPRs = NextFreeSGPR; 4727 4728 if (Version.Major >= 10) 4729 NumSGPRs = 0; 4730 else { 4731 unsigned MaxAddressableNumSGPRs = 4732 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4733 4734 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4735 NumSGPRs > MaxAddressableNumSGPRs) 4736 return OutOfRangeError(SGPRRange); 4737 4738 NumSGPRs += 4739 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4740 4741 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4742 NumSGPRs > MaxAddressableNumSGPRs) 4743 return OutOfRangeError(SGPRRange); 4744 4745 if (Features.test(FeatureSGPRInitBug)) 4746 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4747 } 4748 4749 VGPRBlocks = 4750 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4751 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4752 4753 return false; 4754 } 4755 4756 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4757 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4758 return TokError("directive only supported for amdgcn architecture"); 4759 4760 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4761 return TokError("directive only supported for amdhsa OS"); 4762 4763 StringRef KernelName; 4764 if (getParser().parseIdentifier(KernelName)) 4765 return true; 4766 4767 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4768 4769 StringSet<> Seen; 4770 4771 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4772 4773 SMRange VGPRRange; 4774 uint64_t NextFreeVGPR = 0; 4775 uint64_t AccumOffset = 0; 4776 uint64_t SharedVGPRCount = 0; 4777 SMRange SGPRRange; 4778 uint64_t NextFreeSGPR = 0; 4779 4780 // Count the number of user SGPRs implied from the enabled feature bits. 4781 unsigned ImpliedUserSGPRCount = 0; 4782 4783 // Track if the asm explicitly contains the directive for the user SGPR 4784 // count. 4785 Optional<unsigned> ExplicitUserSGPRCount; 4786 bool ReserveVCC = true; 4787 bool ReserveFlatScr = true; 4788 Optional<bool> EnableWavefrontSize32; 4789 4790 while (true) { 4791 while (trySkipToken(AsmToken::EndOfStatement)); 4792 4793 StringRef ID; 4794 SMRange IDRange = getTok().getLocRange(); 4795 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4796 return true; 4797 4798 if (ID == ".end_amdhsa_kernel") 4799 break; 4800 4801 if (Seen.find(ID) != Seen.end()) 4802 return TokError(".amdhsa_ directives cannot be repeated"); 4803 Seen.insert(ID); 4804 4805 SMLoc ValStart = getLoc(); 4806 int64_t IVal; 4807 if (getParser().parseAbsoluteExpression(IVal)) 4808 return true; 4809 SMLoc ValEnd = getLoc(); 4810 SMRange ValRange = SMRange(ValStart, ValEnd); 4811 4812 if (IVal < 0) 4813 return OutOfRangeError(ValRange); 4814 4815 uint64_t Val = IVal; 4816 4817 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4818 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4819 return OutOfRangeError(RANGE); \ 4820 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4821 4822 if (ID == ".amdhsa_group_segment_fixed_size") { 4823 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4824 return OutOfRangeError(ValRange); 4825 KD.group_segment_fixed_size = Val; 4826 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4827 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4828 return OutOfRangeError(ValRange); 4829 KD.private_segment_fixed_size = Val; 4830 } else if (ID == ".amdhsa_kernarg_size") { 4831 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4832 return OutOfRangeError(ValRange); 4833 KD.kernarg_size = Val; 4834 } else if (ID == ".amdhsa_user_sgpr_count") { 4835 ExplicitUserSGPRCount = Val; 4836 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4837 if (hasArchitectedFlatScratch()) 4838 return Error(IDRange.Start, 4839 "directive is not supported with architected flat scratch", 4840 IDRange); 4841 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4842 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4843 Val, ValRange); 4844 if (Val) 4845 ImpliedUserSGPRCount += 4; 4846 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4847 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4848 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4849 ValRange); 4850 if (Val) 4851 ImpliedUserSGPRCount += 2; 4852 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4853 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4854 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4855 ValRange); 4856 if (Val) 4857 ImpliedUserSGPRCount += 2; 4858 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4859 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4860 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4861 Val, ValRange); 4862 if (Val) 4863 ImpliedUserSGPRCount += 2; 4864 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4865 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4866 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4867 ValRange); 4868 if (Val) 4869 ImpliedUserSGPRCount += 2; 4870 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4871 if (hasArchitectedFlatScratch()) 4872 return Error(IDRange.Start, 4873 "directive is not supported with architected flat scratch", 4874 IDRange); 4875 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4876 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4877 ValRange); 4878 if (Val) 4879 ImpliedUserSGPRCount += 2; 4880 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4881 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4882 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4883 Val, ValRange); 4884 if (Val) 4885 ImpliedUserSGPRCount += 1; 4886 } else if (ID == ".amdhsa_wavefront_size32") { 4887 if (IVersion.Major < 10) 4888 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4889 EnableWavefrontSize32 = Val; 4890 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4891 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4892 Val, ValRange); 4893 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4894 if (hasArchitectedFlatScratch()) 4895 return Error(IDRange.Start, 4896 "directive is not supported with architected flat scratch", 4897 IDRange); 4898 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4899 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4900 } else if (ID == ".amdhsa_enable_private_segment") { 4901 if (!hasArchitectedFlatScratch()) 4902 return Error( 4903 IDRange.Start, 4904 "directive is not supported without architected flat scratch", 4905 IDRange); 4906 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4907 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4908 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4909 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4910 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4911 ValRange); 4912 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4913 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4914 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4915 ValRange); 4916 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4917 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4918 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4919 ValRange); 4920 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4921 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4922 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4923 ValRange); 4924 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4925 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4926 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4927 ValRange); 4928 } else if (ID == ".amdhsa_next_free_vgpr") { 4929 VGPRRange = ValRange; 4930 NextFreeVGPR = Val; 4931 } else if (ID == ".amdhsa_next_free_sgpr") { 4932 SGPRRange = ValRange; 4933 NextFreeSGPR = Val; 4934 } else if (ID == ".amdhsa_accum_offset") { 4935 if (!isGFX90A()) 4936 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4937 AccumOffset = Val; 4938 } else if (ID == ".amdhsa_reserve_vcc") { 4939 if (!isUInt<1>(Val)) 4940 return OutOfRangeError(ValRange); 4941 ReserveVCC = Val; 4942 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4943 if (IVersion.Major < 7) 4944 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4945 if (hasArchitectedFlatScratch()) 4946 return Error(IDRange.Start, 4947 "directive is not supported with architected flat scratch", 4948 IDRange); 4949 if (!isUInt<1>(Val)) 4950 return OutOfRangeError(ValRange); 4951 ReserveFlatScr = Val; 4952 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4953 if (IVersion.Major < 8) 4954 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4955 if (!isUInt<1>(Val)) 4956 return OutOfRangeError(ValRange); 4957 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4958 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4959 IDRange); 4960 } else if (ID == ".amdhsa_float_round_mode_32") { 4961 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4962 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4963 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4964 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4965 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4966 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4967 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4968 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4969 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4970 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4971 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4972 ValRange); 4973 } else if (ID == ".amdhsa_dx10_clamp") { 4974 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4975 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4976 } else if (ID == ".amdhsa_ieee_mode") { 4977 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4978 Val, ValRange); 4979 } else if (ID == ".amdhsa_fp16_overflow") { 4980 if (IVersion.Major < 9) 4981 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4982 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4983 ValRange); 4984 } else if (ID == ".amdhsa_tg_split") { 4985 if (!isGFX90A()) 4986 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4987 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4988 ValRange); 4989 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4990 if (IVersion.Major < 10) 4991 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4992 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4993 ValRange); 4994 } else if (ID == ".amdhsa_memory_ordered") { 4995 if (IVersion.Major < 10) 4996 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4997 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4998 ValRange); 4999 } else if (ID == ".amdhsa_forward_progress") { 5000 if (IVersion.Major < 10) 5001 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5002 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5003 ValRange); 5004 } else if (ID == ".amdhsa_shared_vgpr_count") { 5005 if (IVersion.Major < 10) 5006 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5007 SharedVGPRCount = Val; 5008 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5009 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5010 ValRange); 5011 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5012 PARSE_BITS_ENTRY( 5013 KD.compute_pgm_rsrc2, 5014 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5015 ValRange); 5016 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5017 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5018 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5019 Val, ValRange); 5020 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5021 PARSE_BITS_ENTRY( 5022 KD.compute_pgm_rsrc2, 5023 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5024 ValRange); 5025 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5027 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5028 Val, ValRange); 5029 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5030 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5031 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5032 Val, ValRange); 5033 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5035 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5036 Val, ValRange); 5037 } else if (ID == ".amdhsa_exception_int_div_zero") { 5038 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5039 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5040 Val, ValRange); 5041 } else { 5042 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5043 } 5044 5045 #undef PARSE_BITS_ENTRY 5046 } 5047 5048 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5049 return TokError(".amdhsa_next_free_vgpr directive is required"); 5050 5051 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5052 return TokError(".amdhsa_next_free_sgpr directive is required"); 5053 5054 unsigned VGPRBlocks; 5055 unsigned SGPRBlocks; 5056 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5057 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5058 EnableWavefrontSize32, NextFreeVGPR, 5059 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5060 SGPRBlocks)) 5061 return true; 5062 5063 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5064 VGPRBlocks)) 5065 return OutOfRangeError(VGPRRange); 5066 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5067 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5068 5069 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5070 SGPRBlocks)) 5071 return OutOfRangeError(SGPRRange); 5072 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5073 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5074 SGPRBlocks); 5075 5076 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5077 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5078 "enabled user SGPRs"); 5079 5080 unsigned UserSGPRCount = 5081 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5082 5083 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5084 return TokError("too many user SGPRs enabled"); 5085 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5086 UserSGPRCount); 5087 5088 if (isGFX90A()) { 5089 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5090 return TokError(".amdhsa_accum_offset directive is required"); 5091 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5092 return TokError("accum_offset should be in range [4..256] in " 5093 "increments of 4"); 5094 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5095 return TokError("accum_offset exceeds total VGPR allocation"); 5096 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5097 (AccumOffset / 4 - 1)); 5098 } 5099 5100 if (IVersion.Major == 10) { 5101 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5102 if (SharedVGPRCount && EnableWavefrontSize32) { 5103 return TokError("shared_vgpr_count directive not valid on " 5104 "wavefront size 32"); 5105 } 5106 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5107 return TokError("shared_vgpr_count*2 + " 5108 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5109 "exceed 63\n"); 5110 } 5111 } 5112 5113 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5114 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5115 ReserveFlatScr); 5116 return false; 5117 } 5118 5119 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5120 uint32_t Major; 5121 uint32_t Minor; 5122 5123 if (ParseDirectiveMajorMinor(Major, Minor)) 5124 return true; 5125 5126 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5127 return false; 5128 } 5129 5130 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5131 uint32_t Major; 5132 uint32_t Minor; 5133 uint32_t Stepping; 5134 StringRef VendorName; 5135 StringRef ArchName; 5136 5137 // If this directive has no arguments, then use the ISA version for the 5138 // targeted GPU. 5139 if (isToken(AsmToken::EndOfStatement)) { 5140 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5141 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5142 ISA.Stepping, 5143 "AMD", "AMDGPU"); 5144 return false; 5145 } 5146 5147 if (ParseDirectiveMajorMinor(Major, Minor)) 5148 return true; 5149 5150 if (!trySkipToken(AsmToken::Comma)) 5151 return TokError("stepping version number required, comma expected"); 5152 5153 if (ParseAsAbsoluteExpression(Stepping)) 5154 return TokError("invalid stepping version"); 5155 5156 if (!trySkipToken(AsmToken::Comma)) 5157 return TokError("vendor name required, comma expected"); 5158 5159 if (!parseString(VendorName, "invalid vendor name")) 5160 return true; 5161 5162 if (!trySkipToken(AsmToken::Comma)) 5163 return TokError("arch name required, comma expected"); 5164 5165 if (!parseString(ArchName, "invalid arch name")) 5166 return true; 5167 5168 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5169 VendorName, ArchName); 5170 return false; 5171 } 5172 5173 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5174 amd_kernel_code_t &Header) { 5175 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5176 // assembly for backwards compatibility. 5177 if (ID == "max_scratch_backing_memory_byte_size") { 5178 Parser.eatToEndOfStatement(); 5179 return false; 5180 } 5181 5182 SmallString<40> ErrStr; 5183 raw_svector_ostream Err(ErrStr); 5184 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5185 return TokError(Err.str()); 5186 } 5187 Lex(); 5188 5189 if (ID == "enable_wavefront_size32") { 5190 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5191 if (!isGFX10Plus()) 5192 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5193 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5194 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5195 } else { 5196 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5197 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5198 } 5199 } 5200 5201 if (ID == "wavefront_size") { 5202 if (Header.wavefront_size == 5) { 5203 if (!isGFX10Plus()) 5204 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5205 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5206 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5207 } else if (Header.wavefront_size == 6) { 5208 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5209 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5210 } 5211 } 5212 5213 if (ID == "enable_wgp_mode") { 5214 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5215 !isGFX10Plus()) 5216 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5217 } 5218 5219 if (ID == "enable_mem_ordered") { 5220 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5221 !isGFX10Plus()) 5222 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5223 } 5224 5225 if (ID == "enable_fwd_progress") { 5226 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5227 !isGFX10Plus()) 5228 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5229 } 5230 5231 return false; 5232 } 5233 5234 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5235 amd_kernel_code_t Header; 5236 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5237 5238 while (true) { 5239 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5240 // will set the current token to EndOfStatement. 5241 while(trySkipToken(AsmToken::EndOfStatement)); 5242 5243 StringRef ID; 5244 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5245 return true; 5246 5247 if (ID == ".end_amd_kernel_code_t") 5248 break; 5249 5250 if (ParseAMDKernelCodeTValue(ID, Header)) 5251 return true; 5252 } 5253 5254 getTargetStreamer().EmitAMDKernelCodeT(Header); 5255 5256 return false; 5257 } 5258 5259 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5260 StringRef KernelName; 5261 if (!parseId(KernelName, "expected symbol name")) 5262 return true; 5263 5264 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5265 ELF::STT_AMDGPU_HSA_KERNEL); 5266 5267 KernelScope.initialize(getContext()); 5268 return false; 5269 } 5270 5271 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5272 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5273 return Error(getLoc(), 5274 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5275 "architectures"); 5276 } 5277 5278 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5279 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5280 return Error(getParser().getTok().getLoc(), "target id must match options"); 5281 5282 getTargetStreamer().EmitISAVersion(); 5283 Lex(); 5284 5285 return false; 5286 } 5287 5288 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5289 const char *AssemblerDirectiveBegin; 5290 const char *AssemblerDirectiveEnd; 5291 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5292 isHsaAbiVersion3AndAbove(&getSTI()) 5293 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5294 HSAMD::V3::AssemblerDirectiveEnd) 5295 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5296 HSAMD::AssemblerDirectiveEnd); 5297 5298 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5299 return Error(getLoc(), 5300 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5301 "not available on non-amdhsa OSes")).str()); 5302 } 5303 5304 std::string HSAMetadataString; 5305 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5306 HSAMetadataString)) 5307 return true; 5308 5309 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5310 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5311 return Error(getLoc(), "invalid HSA metadata"); 5312 } else { 5313 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5314 return Error(getLoc(), "invalid HSA metadata"); 5315 } 5316 5317 return false; 5318 } 5319 5320 /// Common code to parse out a block of text (typically YAML) between start and 5321 /// end directives. 5322 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5323 const char *AssemblerDirectiveEnd, 5324 std::string &CollectString) { 5325 5326 raw_string_ostream CollectStream(CollectString); 5327 5328 getLexer().setSkipSpace(false); 5329 5330 bool FoundEnd = false; 5331 while (!isToken(AsmToken::Eof)) { 5332 while (isToken(AsmToken::Space)) { 5333 CollectStream << getTokenStr(); 5334 Lex(); 5335 } 5336 5337 if (trySkipId(AssemblerDirectiveEnd)) { 5338 FoundEnd = true; 5339 break; 5340 } 5341 5342 CollectStream << Parser.parseStringToEndOfStatement() 5343 << getContext().getAsmInfo()->getSeparatorString(); 5344 5345 Parser.eatToEndOfStatement(); 5346 } 5347 5348 getLexer().setSkipSpace(true); 5349 5350 if (isToken(AsmToken::Eof) && !FoundEnd) { 5351 return TokError(Twine("expected directive ") + 5352 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5353 } 5354 5355 CollectStream.flush(); 5356 return false; 5357 } 5358 5359 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5360 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5361 std::string String; 5362 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5363 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5364 return true; 5365 5366 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5367 if (!PALMetadata->setFromString(String)) 5368 return Error(getLoc(), "invalid PAL metadata"); 5369 return false; 5370 } 5371 5372 /// Parse the assembler directive for old linear-format PAL metadata. 5373 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5374 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5375 return Error(getLoc(), 5376 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5377 "not available on non-amdpal OSes")).str()); 5378 } 5379 5380 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5381 PALMetadata->setLegacy(); 5382 for (;;) { 5383 uint32_t Key, Value; 5384 if (ParseAsAbsoluteExpression(Key)) { 5385 return TokError(Twine("invalid value in ") + 5386 Twine(PALMD::AssemblerDirective)); 5387 } 5388 if (!trySkipToken(AsmToken::Comma)) { 5389 return TokError(Twine("expected an even number of values in ") + 5390 Twine(PALMD::AssemblerDirective)); 5391 } 5392 if (ParseAsAbsoluteExpression(Value)) { 5393 return TokError(Twine("invalid value in ") + 5394 Twine(PALMD::AssemblerDirective)); 5395 } 5396 PALMetadata->setRegister(Key, Value); 5397 if (!trySkipToken(AsmToken::Comma)) 5398 break; 5399 } 5400 return false; 5401 } 5402 5403 /// ParseDirectiveAMDGPULDS 5404 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5405 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5406 if (getParser().checkForValidSection()) 5407 return true; 5408 5409 StringRef Name; 5410 SMLoc NameLoc = getLoc(); 5411 if (getParser().parseIdentifier(Name)) 5412 return TokError("expected identifier in directive"); 5413 5414 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5415 if (parseToken(AsmToken::Comma, "expected ','")) 5416 return true; 5417 5418 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5419 5420 int64_t Size; 5421 SMLoc SizeLoc = getLoc(); 5422 if (getParser().parseAbsoluteExpression(Size)) 5423 return true; 5424 if (Size < 0) 5425 return Error(SizeLoc, "size must be non-negative"); 5426 if (Size > LocalMemorySize) 5427 return Error(SizeLoc, "size is too large"); 5428 5429 int64_t Alignment = 4; 5430 if (trySkipToken(AsmToken::Comma)) { 5431 SMLoc AlignLoc = getLoc(); 5432 if (getParser().parseAbsoluteExpression(Alignment)) 5433 return true; 5434 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5435 return Error(AlignLoc, "alignment must be a power of two"); 5436 5437 // Alignment larger than the size of LDS is possible in theory, as long 5438 // as the linker manages to place to symbol at address 0, but we do want 5439 // to make sure the alignment fits nicely into a 32-bit integer. 5440 if (Alignment >= 1u << 31) 5441 return Error(AlignLoc, "alignment is too large"); 5442 } 5443 5444 if (parseToken(AsmToken::EndOfStatement, 5445 "unexpected token in '.amdgpu_lds' directive")) 5446 return true; 5447 5448 Symbol->redefineIfPossible(); 5449 if (!Symbol->isUndefined()) 5450 return Error(NameLoc, "invalid symbol redefinition"); 5451 5452 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5453 return false; 5454 } 5455 5456 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5457 StringRef IDVal = DirectiveID.getString(); 5458 5459 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5460 if (IDVal == ".amdhsa_kernel") 5461 return ParseDirectiveAMDHSAKernel(); 5462 5463 // TODO: Restructure/combine with PAL metadata directive. 5464 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5465 return ParseDirectiveHSAMetadata(); 5466 } else { 5467 if (IDVal == ".hsa_code_object_version") 5468 return ParseDirectiveHSACodeObjectVersion(); 5469 5470 if (IDVal == ".hsa_code_object_isa") 5471 return ParseDirectiveHSACodeObjectISA(); 5472 5473 if (IDVal == ".amd_kernel_code_t") 5474 return ParseDirectiveAMDKernelCodeT(); 5475 5476 if (IDVal == ".amdgpu_hsa_kernel") 5477 return ParseDirectiveAMDGPUHsaKernel(); 5478 5479 if (IDVal == ".amd_amdgpu_isa") 5480 return ParseDirectiveISAVersion(); 5481 5482 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5483 return ParseDirectiveHSAMetadata(); 5484 } 5485 5486 if (IDVal == ".amdgcn_target") 5487 return ParseDirectiveAMDGCNTarget(); 5488 5489 if (IDVal == ".amdgpu_lds") 5490 return ParseDirectiveAMDGPULDS(); 5491 5492 if (IDVal == PALMD::AssemblerDirectiveBegin) 5493 return ParseDirectivePALMetadataBegin(); 5494 5495 if (IDVal == PALMD::AssemblerDirective) 5496 return ParseDirectivePALMetadata(); 5497 5498 return true; 5499 } 5500 5501 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5502 unsigned RegNo) { 5503 5504 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5505 return isGFX9Plus(); 5506 5507 // GFX10 has 2 more SGPRs 104 and 105. 5508 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5509 return hasSGPR104_SGPR105(); 5510 5511 switch (RegNo) { 5512 case AMDGPU::SRC_SHARED_BASE: 5513 case AMDGPU::SRC_SHARED_LIMIT: 5514 case AMDGPU::SRC_PRIVATE_BASE: 5515 case AMDGPU::SRC_PRIVATE_LIMIT: 5516 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5517 return isGFX9Plus(); 5518 case AMDGPU::TBA: 5519 case AMDGPU::TBA_LO: 5520 case AMDGPU::TBA_HI: 5521 case AMDGPU::TMA: 5522 case AMDGPU::TMA_LO: 5523 case AMDGPU::TMA_HI: 5524 return !isGFX9Plus(); 5525 case AMDGPU::XNACK_MASK: 5526 case AMDGPU::XNACK_MASK_LO: 5527 case AMDGPU::XNACK_MASK_HI: 5528 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5529 case AMDGPU::SGPR_NULL: 5530 return isGFX10Plus(); 5531 default: 5532 break; 5533 } 5534 5535 if (isCI()) 5536 return true; 5537 5538 if (isSI() || isGFX10Plus()) { 5539 // No flat_scr on SI. 5540 // On GFX10 flat scratch is not a valid register operand and can only be 5541 // accessed with s_setreg/s_getreg. 5542 switch (RegNo) { 5543 case AMDGPU::FLAT_SCR: 5544 case AMDGPU::FLAT_SCR_LO: 5545 case AMDGPU::FLAT_SCR_HI: 5546 return false; 5547 default: 5548 return true; 5549 } 5550 } 5551 5552 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5553 // SI/CI have. 5554 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5555 return hasSGPR102_SGPR103(); 5556 5557 return true; 5558 } 5559 5560 OperandMatchResultTy 5561 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5562 OperandMode Mode) { 5563 // Try to parse with a custom parser 5564 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5565 5566 // If we successfully parsed the operand or if there as an error parsing, 5567 // we are done. 5568 // 5569 // If we are parsing after we reach EndOfStatement then this means we 5570 // are appending default values to the Operands list. This is only done 5571 // by custom parser, so we shouldn't continue on to the generic parsing. 5572 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5573 isToken(AsmToken::EndOfStatement)) 5574 return ResTy; 5575 5576 SMLoc RBraceLoc; 5577 SMLoc LBraceLoc = getLoc(); 5578 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5579 unsigned Prefix = Operands.size(); 5580 5581 for (;;) { 5582 auto Loc = getLoc(); 5583 ResTy = parseReg(Operands); 5584 if (ResTy == MatchOperand_NoMatch) 5585 Error(Loc, "expected a register"); 5586 if (ResTy != MatchOperand_Success) 5587 return MatchOperand_ParseFail; 5588 5589 RBraceLoc = getLoc(); 5590 if (trySkipToken(AsmToken::RBrac)) 5591 break; 5592 5593 if (!skipToken(AsmToken::Comma, 5594 "expected a comma or a closing square bracket")) { 5595 return MatchOperand_ParseFail; 5596 } 5597 } 5598 5599 if (Operands.size() - Prefix > 1) { 5600 Operands.insert(Operands.begin() + Prefix, 5601 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5602 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5603 } 5604 5605 return MatchOperand_Success; 5606 } 5607 5608 return parseRegOrImm(Operands); 5609 } 5610 5611 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5612 // Clear any forced encodings from the previous instruction. 5613 setForcedEncodingSize(0); 5614 setForcedDPP(false); 5615 setForcedSDWA(false); 5616 5617 if (Name.endswith("_e64")) { 5618 setForcedEncodingSize(64); 5619 return Name.substr(0, Name.size() - 4); 5620 } else if (Name.endswith("_e32")) { 5621 setForcedEncodingSize(32); 5622 return Name.substr(0, Name.size() - 4); 5623 } else if (Name.endswith("_dpp")) { 5624 setForcedDPP(true); 5625 return Name.substr(0, Name.size() - 4); 5626 } else if (Name.endswith("_sdwa")) { 5627 setForcedSDWA(true); 5628 return Name.substr(0, Name.size() - 5); 5629 } 5630 return Name; 5631 } 5632 5633 static void applyMnemonicAliases(StringRef &Mnemonic, 5634 const FeatureBitset &Features, 5635 unsigned VariantID); 5636 5637 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5638 StringRef Name, 5639 SMLoc NameLoc, OperandVector &Operands) { 5640 // Add the instruction mnemonic 5641 Name = parseMnemonicSuffix(Name); 5642 5643 // If the target architecture uses MnemonicAlias, call it here to parse 5644 // operands correctly. 5645 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5646 5647 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5648 5649 bool IsMIMG = Name.startswith("image_"); 5650 5651 while (!trySkipToken(AsmToken::EndOfStatement)) { 5652 OperandMode Mode = OperandMode_Default; 5653 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5654 Mode = OperandMode_NSA; 5655 CPolSeen = 0; 5656 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5657 5658 if (Res != MatchOperand_Success) { 5659 checkUnsupportedInstruction(Name, NameLoc); 5660 if (!Parser.hasPendingError()) { 5661 // FIXME: use real operand location rather than the current location. 5662 StringRef Msg = 5663 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5664 "not a valid operand."; 5665 Error(getLoc(), Msg); 5666 } 5667 while (!trySkipToken(AsmToken::EndOfStatement)) { 5668 lex(); 5669 } 5670 return true; 5671 } 5672 5673 // Eat the comma or space if there is one. 5674 trySkipToken(AsmToken::Comma); 5675 } 5676 5677 return false; 5678 } 5679 5680 //===----------------------------------------------------------------------===// 5681 // Utility functions 5682 //===----------------------------------------------------------------------===// 5683 5684 OperandMatchResultTy 5685 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5686 5687 if (!trySkipId(Prefix, AsmToken::Colon)) 5688 return MatchOperand_NoMatch; 5689 5690 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5691 } 5692 5693 OperandMatchResultTy 5694 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5695 AMDGPUOperand::ImmTy ImmTy, 5696 bool (*ConvertResult)(int64_t&)) { 5697 SMLoc S = getLoc(); 5698 int64_t Value = 0; 5699 5700 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5701 if (Res != MatchOperand_Success) 5702 return Res; 5703 5704 if (ConvertResult && !ConvertResult(Value)) { 5705 Error(S, "invalid " + StringRef(Prefix) + " value."); 5706 } 5707 5708 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5709 return MatchOperand_Success; 5710 } 5711 5712 OperandMatchResultTy 5713 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5714 OperandVector &Operands, 5715 AMDGPUOperand::ImmTy ImmTy, 5716 bool (*ConvertResult)(int64_t&)) { 5717 SMLoc S = getLoc(); 5718 if (!trySkipId(Prefix, AsmToken::Colon)) 5719 return MatchOperand_NoMatch; 5720 5721 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5722 return MatchOperand_ParseFail; 5723 5724 unsigned Val = 0; 5725 const unsigned MaxSize = 4; 5726 5727 // FIXME: How to verify the number of elements matches the number of src 5728 // operands? 5729 for (int I = 0; ; ++I) { 5730 int64_t Op; 5731 SMLoc Loc = getLoc(); 5732 if (!parseExpr(Op)) 5733 return MatchOperand_ParseFail; 5734 5735 if (Op != 0 && Op != 1) { 5736 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5737 return MatchOperand_ParseFail; 5738 } 5739 5740 Val |= (Op << I); 5741 5742 if (trySkipToken(AsmToken::RBrac)) 5743 break; 5744 5745 if (I + 1 == MaxSize) { 5746 Error(getLoc(), "expected a closing square bracket"); 5747 return MatchOperand_ParseFail; 5748 } 5749 5750 if (!skipToken(AsmToken::Comma, "expected a comma")) 5751 return MatchOperand_ParseFail; 5752 } 5753 5754 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5755 return MatchOperand_Success; 5756 } 5757 5758 OperandMatchResultTy 5759 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5760 AMDGPUOperand::ImmTy ImmTy) { 5761 int64_t Bit; 5762 SMLoc S = getLoc(); 5763 5764 if (trySkipId(Name)) { 5765 Bit = 1; 5766 } else if (trySkipId("no", Name)) { 5767 Bit = 0; 5768 } else { 5769 return MatchOperand_NoMatch; 5770 } 5771 5772 if (Name == "r128" && !hasMIMG_R128()) { 5773 Error(S, "r128 modifier is not supported on this GPU"); 5774 return MatchOperand_ParseFail; 5775 } 5776 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5777 Error(S, "a16 modifier is not supported on this GPU"); 5778 return MatchOperand_ParseFail; 5779 } 5780 5781 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5782 ImmTy = AMDGPUOperand::ImmTyR128A16; 5783 5784 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5785 return MatchOperand_Success; 5786 } 5787 5788 OperandMatchResultTy 5789 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5790 unsigned CPolOn = 0; 5791 unsigned CPolOff = 0; 5792 SMLoc S = getLoc(); 5793 5794 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5795 if (isGFX940() && !Mnemo.startswith("s_")) { 5796 if (trySkipId("sc0")) 5797 CPolOn = AMDGPU::CPol::SC0; 5798 else if (trySkipId("nosc0")) 5799 CPolOff = AMDGPU::CPol::SC0; 5800 else if (trySkipId("nt")) 5801 CPolOn = AMDGPU::CPol::NT; 5802 else if (trySkipId("nont")) 5803 CPolOff = AMDGPU::CPol::NT; 5804 else if (trySkipId("sc1")) 5805 CPolOn = AMDGPU::CPol::SC1; 5806 else if (trySkipId("nosc1")) 5807 CPolOff = AMDGPU::CPol::SC1; 5808 else 5809 return MatchOperand_NoMatch; 5810 } 5811 else if (trySkipId("glc")) 5812 CPolOn = AMDGPU::CPol::GLC; 5813 else if (trySkipId("noglc")) 5814 CPolOff = AMDGPU::CPol::GLC; 5815 else if (trySkipId("slc")) 5816 CPolOn = AMDGPU::CPol::SLC; 5817 else if (trySkipId("noslc")) 5818 CPolOff = AMDGPU::CPol::SLC; 5819 else if (trySkipId("dlc")) 5820 CPolOn = AMDGPU::CPol::DLC; 5821 else if (trySkipId("nodlc")) 5822 CPolOff = AMDGPU::CPol::DLC; 5823 else if (trySkipId("scc")) 5824 CPolOn = AMDGPU::CPol::SCC; 5825 else if (trySkipId("noscc")) 5826 CPolOff = AMDGPU::CPol::SCC; 5827 else 5828 return MatchOperand_NoMatch; 5829 5830 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5831 Error(S, "dlc modifier is not supported on this GPU"); 5832 return MatchOperand_ParseFail; 5833 } 5834 5835 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5836 Error(S, "scc modifier is not supported on this GPU"); 5837 return MatchOperand_ParseFail; 5838 } 5839 5840 if (CPolSeen & (CPolOn | CPolOff)) { 5841 Error(S, "duplicate cache policy modifier"); 5842 return MatchOperand_ParseFail; 5843 } 5844 5845 CPolSeen |= (CPolOn | CPolOff); 5846 5847 for (unsigned I = 1; I != Operands.size(); ++I) { 5848 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5849 if (Op.isCPol()) { 5850 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5851 return MatchOperand_Success; 5852 } 5853 } 5854 5855 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5856 AMDGPUOperand::ImmTyCPol)); 5857 5858 return MatchOperand_Success; 5859 } 5860 5861 static void addOptionalImmOperand( 5862 MCInst& Inst, const OperandVector& Operands, 5863 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5864 AMDGPUOperand::ImmTy ImmT, 5865 int64_t Default = 0) { 5866 auto i = OptionalIdx.find(ImmT); 5867 if (i != OptionalIdx.end()) { 5868 unsigned Idx = i->second; 5869 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5870 } else { 5871 Inst.addOperand(MCOperand::createImm(Default)); 5872 } 5873 } 5874 5875 OperandMatchResultTy 5876 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5877 StringRef &Value, 5878 SMLoc &StringLoc) { 5879 if (!trySkipId(Prefix, AsmToken::Colon)) 5880 return MatchOperand_NoMatch; 5881 5882 StringLoc = getLoc(); 5883 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5884 : MatchOperand_ParseFail; 5885 } 5886 5887 //===----------------------------------------------------------------------===// 5888 // MTBUF format 5889 //===----------------------------------------------------------------------===// 5890 5891 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5892 int64_t MaxVal, 5893 int64_t &Fmt) { 5894 int64_t Val; 5895 SMLoc Loc = getLoc(); 5896 5897 auto Res = parseIntWithPrefix(Pref, Val); 5898 if (Res == MatchOperand_ParseFail) 5899 return false; 5900 if (Res == MatchOperand_NoMatch) 5901 return true; 5902 5903 if (Val < 0 || Val > MaxVal) { 5904 Error(Loc, Twine("out of range ", StringRef(Pref))); 5905 return false; 5906 } 5907 5908 Fmt = Val; 5909 return true; 5910 } 5911 5912 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5913 // values to live in a joint format operand in the MCInst encoding. 5914 OperandMatchResultTy 5915 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5916 using namespace llvm::AMDGPU::MTBUFFormat; 5917 5918 int64_t Dfmt = DFMT_UNDEF; 5919 int64_t Nfmt = NFMT_UNDEF; 5920 5921 // dfmt and nfmt can appear in either order, and each is optional. 5922 for (int I = 0; I < 2; ++I) { 5923 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5924 return MatchOperand_ParseFail; 5925 5926 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5927 return MatchOperand_ParseFail; 5928 } 5929 // Skip optional comma between dfmt/nfmt 5930 // but guard against 2 commas following each other. 5931 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5932 !peekToken().is(AsmToken::Comma)) { 5933 trySkipToken(AsmToken::Comma); 5934 } 5935 } 5936 5937 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5938 return MatchOperand_NoMatch; 5939 5940 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5941 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5942 5943 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5944 return MatchOperand_Success; 5945 } 5946 5947 OperandMatchResultTy 5948 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5949 using namespace llvm::AMDGPU::MTBUFFormat; 5950 5951 int64_t Fmt = UFMT_UNDEF; 5952 5953 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5954 return MatchOperand_ParseFail; 5955 5956 if (Fmt == UFMT_UNDEF) 5957 return MatchOperand_NoMatch; 5958 5959 Format = Fmt; 5960 return MatchOperand_Success; 5961 } 5962 5963 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5964 int64_t &Nfmt, 5965 StringRef FormatStr, 5966 SMLoc Loc) { 5967 using namespace llvm::AMDGPU::MTBUFFormat; 5968 int64_t Format; 5969 5970 Format = getDfmt(FormatStr); 5971 if (Format != DFMT_UNDEF) { 5972 Dfmt = Format; 5973 return true; 5974 } 5975 5976 Format = getNfmt(FormatStr, getSTI()); 5977 if (Format != NFMT_UNDEF) { 5978 Nfmt = Format; 5979 return true; 5980 } 5981 5982 Error(Loc, "unsupported format"); 5983 return false; 5984 } 5985 5986 OperandMatchResultTy 5987 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5988 SMLoc FormatLoc, 5989 int64_t &Format) { 5990 using namespace llvm::AMDGPU::MTBUFFormat; 5991 5992 int64_t Dfmt = DFMT_UNDEF; 5993 int64_t Nfmt = NFMT_UNDEF; 5994 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5995 return MatchOperand_ParseFail; 5996 5997 if (trySkipToken(AsmToken::Comma)) { 5998 StringRef Str; 5999 SMLoc Loc = getLoc(); 6000 if (!parseId(Str, "expected a format string") || 6001 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6002 return MatchOperand_ParseFail; 6003 } 6004 if (Dfmt == DFMT_UNDEF) { 6005 Error(Loc, "duplicate numeric format"); 6006 return MatchOperand_ParseFail; 6007 } else if (Nfmt == NFMT_UNDEF) { 6008 Error(Loc, "duplicate data format"); 6009 return MatchOperand_ParseFail; 6010 } 6011 } 6012 6013 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6014 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6015 6016 if (isGFX10Plus()) { 6017 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6018 if (Ufmt == UFMT_UNDEF) { 6019 Error(FormatLoc, "unsupported format"); 6020 return MatchOperand_ParseFail; 6021 } 6022 Format = Ufmt; 6023 } else { 6024 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6025 } 6026 6027 return MatchOperand_Success; 6028 } 6029 6030 OperandMatchResultTy 6031 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6032 SMLoc Loc, 6033 int64_t &Format) { 6034 using namespace llvm::AMDGPU::MTBUFFormat; 6035 6036 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6037 if (Id == UFMT_UNDEF) 6038 return MatchOperand_NoMatch; 6039 6040 if (!isGFX10Plus()) { 6041 Error(Loc, "unified format is not supported on this GPU"); 6042 return MatchOperand_ParseFail; 6043 } 6044 6045 Format = Id; 6046 return MatchOperand_Success; 6047 } 6048 6049 OperandMatchResultTy 6050 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6051 using namespace llvm::AMDGPU::MTBUFFormat; 6052 SMLoc Loc = getLoc(); 6053 6054 if (!parseExpr(Format)) 6055 return MatchOperand_ParseFail; 6056 if (!isValidFormatEncoding(Format, getSTI())) { 6057 Error(Loc, "out of range format"); 6058 return MatchOperand_ParseFail; 6059 } 6060 6061 return MatchOperand_Success; 6062 } 6063 6064 OperandMatchResultTy 6065 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6066 using namespace llvm::AMDGPU::MTBUFFormat; 6067 6068 if (!trySkipId("format", AsmToken::Colon)) 6069 return MatchOperand_NoMatch; 6070 6071 if (trySkipToken(AsmToken::LBrac)) { 6072 StringRef FormatStr; 6073 SMLoc Loc = getLoc(); 6074 if (!parseId(FormatStr, "expected a format string")) 6075 return MatchOperand_ParseFail; 6076 6077 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6078 if (Res == MatchOperand_NoMatch) 6079 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6080 if (Res != MatchOperand_Success) 6081 return Res; 6082 6083 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6084 return MatchOperand_ParseFail; 6085 6086 return MatchOperand_Success; 6087 } 6088 6089 return parseNumericFormat(Format); 6090 } 6091 6092 OperandMatchResultTy 6093 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6094 using namespace llvm::AMDGPU::MTBUFFormat; 6095 6096 int64_t Format = getDefaultFormatEncoding(getSTI()); 6097 OperandMatchResultTy Res; 6098 SMLoc Loc = getLoc(); 6099 6100 // Parse legacy format syntax. 6101 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6102 if (Res == MatchOperand_ParseFail) 6103 return Res; 6104 6105 bool FormatFound = (Res == MatchOperand_Success); 6106 6107 Operands.push_back( 6108 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6109 6110 if (FormatFound) 6111 trySkipToken(AsmToken::Comma); 6112 6113 if (isToken(AsmToken::EndOfStatement)) { 6114 // We are expecting an soffset operand, 6115 // but let matcher handle the error. 6116 return MatchOperand_Success; 6117 } 6118 6119 // Parse soffset. 6120 Res = parseRegOrImm(Operands); 6121 if (Res != MatchOperand_Success) 6122 return Res; 6123 6124 trySkipToken(AsmToken::Comma); 6125 6126 if (!FormatFound) { 6127 Res = parseSymbolicOrNumericFormat(Format); 6128 if (Res == MatchOperand_ParseFail) 6129 return Res; 6130 if (Res == MatchOperand_Success) { 6131 auto Size = Operands.size(); 6132 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6133 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6134 Op.setImm(Format); 6135 } 6136 return MatchOperand_Success; 6137 } 6138 6139 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6140 Error(getLoc(), "duplicate format"); 6141 return MatchOperand_ParseFail; 6142 } 6143 return MatchOperand_Success; 6144 } 6145 6146 //===----------------------------------------------------------------------===// 6147 // ds 6148 //===----------------------------------------------------------------------===// 6149 6150 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6151 const OperandVector &Operands) { 6152 OptionalImmIndexMap OptionalIdx; 6153 6154 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6155 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6156 6157 // Add the register arguments 6158 if (Op.isReg()) { 6159 Op.addRegOperands(Inst, 1); 6160 continue; 6161 } 6162 6163 // Handle optional arguments 6164 OptionalIdx[Op.getImmTy()] = i; 6165 } 6166 6167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6168 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6169 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6170 6171 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6172 } 6173 6174 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6175 bool IsGdsHardcoded) { 6176 OptionalImmIndexMap OptionalIdx; 6177 6178 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6179 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6180 6181 // Add the register arguments 6182 if (Op.isReg()) { 6183 Op.addRegOperands(Inst, 1); 6184 continue; 6185 } 6186 6187 if (Op.isToken() && Op.getToken() == "gds") { 6188 IsGdsHardcoded = true; 6189 continue; 6190 } 6191 6192 // Handle optional arguments 6193 OptionalIdx[Op.getImmTy()] = i; 6194 } 6195 6196 AMDGPUOperand::ImmTy OffsetType = 6197 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6198 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6199 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6200 AMDGPUOperand::ImmTyOffset; 6201 6202 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6203 6204 if (!IsGdsHardcoded) { 6205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6206 } 6207 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6208 } 6209 6210 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6211 OptionalImmIndexMap OptionalIdx; 6212 6213 unsigned OperandIdx[4]; 6214 unsigned EnMask = 0; 6215 int SrcIdx = 0; 6216 6217 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6218 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6219 6220 // Add the register arguments 6221 if (Op.isReg()) { 6222 assert(SrcIdx < 4); 6223 OperandIdx[SrcIdx] = Inst.size(); 6224 Op.addRegOperands(Inst, 1); 6225 ++SrcIdx; 6226 continue; 6227 } 6228 6229 if (Op.isOff()) { 6230 assert(SrcIdx < 4); 6231 OperandIdx[SrcIdx] = Inst.size(); 6232 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6233 ++SrcIdx; 6234 continue; 6235 } 6236 6237 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6238 Op.addImmOperands(Inst, 1); 6239 continue; 6240 } 6241 6242 if (Op.isToken() && Op.getToken() == "done") 6243 continue; 6244 6245 // Handle optional arguments 6246 OptionalIdx[Op.getImmTy()] = i; 6247 } 6248 6249 assert(SrcIdx == 4); 6250 6251 bool Compr = false; 6252 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6253 Compr = true; 6254 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6255 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6256 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6257 } 6258 6259 for (auto i = 0; i < SrcIdx; ++i) { 6260 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6261 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6262 } 6263 } 6264 6265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6266 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6267 6268 Inst.addOperand(MCOperand::createImm(EnMask)); 6269 } 6270 6271 //===----------------------------------------------------------------------===// 6272 // s_waitcnt 6273 //===----------------------------------------------------------------------===// 6274 6275 static bool 6276 encodeCnt( 6277 const AMDGPU::IsaVersion ISA, 6278 int64_t &IntVal, 6279 int64_t CntVal, 6280 bool Saturate, 6281 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6282 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6283 { 6284 bool Failed = false; 6285 6286 IntVal = encode(ISA, IntVal, CntVal); 6287 if (CntVal != decode(ISA, IntVal)) { 6288 if (Saturate) { 6289 IntVal = encode(ISA, IntVal, -1); 6290 } else { 6291 Failed = true; 6292 } 6293 } 6294 return Failed; 6295 } 6296 6297 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6298 6299 SMLoc CntLoc = getLoc(); 6300 StringRef CntName = getTokenStr(); 6301 6302 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6303 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6304 return false; 6305 6306 int64_t CntVal; 6307 SMLoc ValLoc = getLoc(); 6308 if (!parseExpr(CntVal)) 6309 return false; 6310 6311 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6312 6313 bool Failed = true; 6314 bool Sat = CntName.endswith("_sat"); 6315 6316 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6317 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6318 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6319 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6320 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6321 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6322 } else { 6323 Error(CntLoc, "invalid counter name " + CntName); 6324 return false; 6325 } 6326 6327 if (Failed) { 6328 Error(ValLoc, "too large value for " + CntName); 6329 return false; 6330 } 6331 6332 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6333 return false; 6334 6335 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6336 if (isToken(AsmToken::EndOfStatement)) { 6337 Error(getLoc(), "expected a counter name"); 6338 return false; 6339 } 6340 } 6341 6342 return true; 6343 } 6344 6345 OperandMatchResultTy 6346 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6347 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6348 int64_t Waitcnt = getWaitcntBitMask(ISA); 6349 SMLoc S = getLoc(); 6350 6351 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6352 while (!isToken(AsmToken::EndOfStatement)) { 6353 if (!parseCnt(Waitcnt)) 6354 return MatchOperand_ParseFail; 6355 } 6356 } else { 6357 if (!parseExpr(Waitcnt)) 6358 return MatchOperand_ParseFail; 6359 } 6360 6361 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6362 return MatchOperand_Success; 6363 } 6364 6365 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6366 SMLoc FieldLoc = getLoc(); 6367 StringRef FieldName = getTokenStr(); 6368 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6369 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6370 return false; 6371 6372 SMLoc ValueLoc = getLoc(); 6373 StringRef ValueName = getTokenStr(); 6374 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6375 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6376 return false; 6377 6378 unsigned Shift; 6379 if (FieldName == "instid0") { 6380 Shift = 0; 6381 } else if (FieldName == "instskip") { 6382 Shift = 4; 6383 } else if (FieldName == "instid1") { 6384 Shift = 7; 6385 } else { 6386 Error(FieldLoc, "invalid field name " + FieldName); 6387 return false; 6388 } 6389 6390 int Value; 6391 if (Shift == 4) { 6392 // Parse values for instskip. 6393 Value = StringSwitch<int>(ValueName) 6394 .Case("SAME", 0) 6395 .Case("NEXT", 1) 6396 .Case("SKIP_1", 2) 6397 .Case("SKIP_2", 3) 6398 .Case("SKIP_3", 4) 6399 .Case("SKIP_4", 5) 6400 .Default(-1); 6401 } else { 6402 // Parse values for instid0 and instid1. 6403 Value = StringSwitch<int>(ValueName) 6404 .Case("NO_DEP", 0) 6405 .Case("VALU_DEP_1", 1) 6406 .Case("VALU_DEP_2", 2) 6407 .Case("VALU_DEP_3", 3) 6408 .Case("VALU_DEP_4", 4) 6409 .Case("TRANS32_DEP_1", 5) 6410 .Case("TRANS32_DEP_2", 6) 6411 .Case("TRANS32_DEP_3", 7) 6412 .Case("FMA_ACCUM_CYCLE_1", 8) 6413 .Case("SALU_CYCLE_1", 9) 6414 .Case("SALU_CYCLE_2", 10) 6415 .Case("SALU_CYCLE_3", 11) 6416 .Default(-1); 6417 } 6418 if (Value < 0) { 6419 Error(ValueLoc, "invalid value name " + ValueName); 6420 return false; 6421 } 6422 6423 Delay |= Value << Shift; 6424 return true; 6425 } 6426 6427 OperandMatchResultTy 6428 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6429 int64_t Delay = 0; 6430 SMLoc S = getLoc(); 6431 6432 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6433 do { 6434 if (!parseDelay(Delay)) 6435 return MatchOperand_ParseFail; 6436 } while (trySkipToken(AsmToken::Pipe)); 6437 } else { 6438 if (!parseExpr(Delay)) 6439 return MatchOperand_ParseFail; 6440 } 6441 6442 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6443 return MatchOperand_Success; 6444 } 6445 6446 bool 6447 AMDGPUOperand::isSWaitCnt() const { 6448 return isImm(); 6449 } 6450 6451 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6452 6453 //===----------------------------------------------------------------------===// 6454 // DepCtr 6455 //===----------------------------------------------------------------------===// 6456 6457 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6458 StringRef DepCtrName) { 6459 switch (ErrorId) { 6460 case OPR_ID_UNKNOWN: 6461 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6462 return; 6463 case OPR_ID_UNSUPPORTED: 6464 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6465 return; 6466 case OPR_ID_DUPLICATE: 6467 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6468 return; 6469 case OPR_VAL_INVALID: 6470 Error(Loc, Twine("invalid value for ", DepCtrName)); 6471 return; 6472 default: 6473 assert(false); 6474 } 6475 } 6476 6477 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6478 6479 using namespace llvm::AMDGPU::DepCtr; 6480 6481 SMLoc DepCtrLoc = getLoc(); 6482 StringRef DepCtrName = getTokenStr(); 6483 6484 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6485 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6486 return false; 6487 6488 int64_t ExprVal; 6489 if (!parseExpr(ExprVal)) 6490 return false; 6491 6492 unsigned PrevOprMask = UsedOprMask; 6493 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6494 6495 if (CntVal < 0) { 6496 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6497 return false; 6498 } 6499 6500 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6501 return false; 6502 6503 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6504 if (isToken(AsmToken::EndOfStatement)) { 6505 Error(getLoc(), "expected a counter name"); 6506 return false; 6507 } 6508 } 6509 6510 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6511 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6512 return true; 6513 } 6514 6515 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6516 using namespace llvm::AMDGPU::DepCtr; 6517 6518 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6519 SMLoc Loc = getLoc(); 6520 6521 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6522 unsigned UsedOprMask = 0; 6523 while (!isToken(AsmToken::EndOfStatement)) { 6524 if (!parseDepCtr(DepCtr, UsedOprMask)) 6525 return MatchOperand_ParseFail; 6526 } 6527 } else { 6528 if (!parseExpr(DepCtr)) 6529 return MatchOperand_ParseFail; 6530 } 6531 6532 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6533 return MatchOperand_Success; 6534 } 6535 6536 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6537 6538 //===----------------------------------------------------------------------===// 6539 // hwreg 6540 //===----------------------------------------------------------------------===// 6541 6542 bool 6543 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6544 OperandInfoTy &Offset, 6545 OperandInfoTy &Width) { 6546 using namespace llvm::AMDGPU::Hwreg; 6547 6548 // The register may be specified by name or using a numeric code 6549 HwReg.Loc = getLoc(); 6550 if (isToken(AsmToken::Identifier) && 6551 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6552 HwReg.IsSymbolic = true; 6553 lex(); // skip register name 6554 } else if (!parseExpr(HwReg.Id, "a register name")) { 6555 return false; 6556 } 6557 6558 if (trySkipToken(AsmToken::RParen)) 6559 return true; 6560 6561 // parse optional params 6562 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6563 return false; 6564 6565 Offset.Loc = getLoc(); 6566 if (!parseExpr(Offset.Id)) 6567 return false; 6568 6569 if (!skipToken(AsmToken::Comma, "expected a comma")) 6570 return false; 6571 6572 Width.Loc = getLoc(); 6573 return parseExpr(Width.Id) && 6574 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6575 } 6576 6577 bool 6578 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6579 const OperandInfoTy &Offset, 6580 const OperandInfoTy &Width) { 6581 6582 using namespace llvm::AMDGPU::Hwreg; 6583 6584 if (HwReg.IsSymbolic) { 6585 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6586 Error(HwReg.Loc, 6587 "specified hardware register is not supported on this GPU"); 6588 return false; 6589 } 6590 } else { 6591 if (!isValidHwreg(HwReg.Id)) { 6592 Error(HwReg.Loc, 6593 "invalid code of hardware register: only 6-bit values are legal"); 6594 return false; 6595 } 6596 } 6597 if (!isValidHwregOffset(Offset.Id)) { 6598 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6599 return false; 6600 } 6601 if (!isValidHwregWidth(Width.Id)) { 6602 Error(Width.Loc, 6603 "invalid bitfield width: only values from 1 to 32 are legal"); 6604 return false; 6605 } 6606 return true; 6607 } 6608 6609 OperandMatchResultTy 6610 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6611 using namespace llvm::AMDGPU::Hwreg; 6612 6613 int64_t ImmVal = 0; 6614 SMLoc Loc = getLoc(); 6615 6616 if (trySkipId("hwreg", AsmToken::LParen)) { 6617 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6618 OperandInfoTy Offset(OFFSET_DEFAULT_); 6619 OperandInfoTy Width(WIDTH_DEFAULT_); 6620 if (parseHwregBody(HwReg, Offset, Width) && 6621 validateHwreg(HwReg, Offset, Width)) { 6622 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6623 } else { 6624 return MatchOperand_ParseFail; 6625 } 6626 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6627 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6628 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6629 return MatchOperand_ParseFail; 6630 } 6631 } else { 6632 return MatchOperand_ParseFail; 6633 } 6634 6635 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6636 return MatchOperand_Success; 6637 } 6638 6639 bool AMDGPUOperand::isHwreg() const { 6640 return isImmTy(ImmTyHwreg); 6641 } 6642 6643 //===----------------------------------------------------------------------===// 6644 // sendmsg 6645 //===----------------------------------------------------------------------===// 6646 6647 bool 6648 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6649 OperandInfoTy &Op, 6650 OperandInfoTy &Stream) { 6651 using namespace llvm::AMDGPU::SendMsg; 6652 6653 Msg.Loc = getLoc(); 6654 if (isToken(AsmToken::Identifier) && 6655 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6656 Msg.IsSymbolic = true; 6657 lex(); // skip message name 6658 } else if (!parseExpr(Msg.Id, "a message name")) { 6659 return false; 6660 } 6661 6662 if (trySkipToken(AsmToken::Comma)) { 6663 Op.IsDefined = true; 6664 Op.Loc = getLoc(); 6665 if (isToken(AsmToken::Identifier) && 6666 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6667 lex(); // skip operation name 6668 } else if (!parseExpr(Op.Id, "an operation name")) { 6669 return false; 6670 } 6671 6672 if (trySkipToken(AsmToken::Comma)) { 6673 Stream.IsDefined = true; 6674 Stream.Loc = getLoc(); 6675 if (!parseExpr(Stream.Id)) 6676 return false; 6677 } 6678 } 6679 6680 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6681 } 6682 6683 bool 6684 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6685 const OperandInfoTy &Op, 6686 const OperandInfoTy &Stream) { 6687 using namespace llvm::AMDGPU::SendMsg; 6688 6689 // Validation strictness depends on whether message is specified 6690 // in a symbolic or in a numeric form. In the latter case 6691 // only encoding possibility is checked. 6692 bool Strict = Msg.IsSymbolic; 6693 6694 if (Strict) { 6695 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6696 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6697 return false; 6698 } 6699 } else { 6700 if (!isValidMsgId(Msg.Id, getSTI())) { 6701 Error(Msg.Loc, "invalid message id"); 6702 return false; 6703 } 6704 } 6705 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6706 if (Op.IsDefined) { 6707 Error(Op.Loc, "message does not support operations"); 6708 } else { 6709 Error(Msg.Loc, "missing message operation"); 6710 } 6711 return false; 6712 } 6713 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6714 Error(Op.Loc, "invalid operation id"); 6715 return false; 6716 } 6717 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6718 Stream.IsDefined) { 6719 Error(Stream.Loc, "message operation does not support streams"); 6720 return false; 6721 } 6722 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6723 Error(Stream.Loc, "invalid message stream id"); 6724 return false; 6725 } 6726 return true; 6727 } 6728 6729 OperandMatchResultTy 6730 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6731 using namespace llvm::AMDGPU::SendMsg; 6732 6733 int64_t ImmVal = 0; 6734 SMLoc Loc = getLoc(); 6735 6736 if (trySkipId("sendmsg", AsmToken::LParen)) { 6737 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6738 OperandInfoTy Op(OP_NONE_); 6739 OperandInfoTy Stream(STREAM_ID_NONE_); 6740 if (parseSendMsgBody(Msg, Op, Stream) && 6741 validateSendMsg(Msg, Op, Stream)) { 6742 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6743 } else { 6744 return MatchOperand_ParseFail; 6745 } 6746 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6747 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6748 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6749 return MatchOperand_ParseFail; 6750 } 6751 } else { 6752 return MatchOperand_ParseFail; 6753 } 6754 6755 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6756 return MatchOperand_Success; 6757 } 6758 6759 bool AMDGPUOperand::isSendMsg() const { 6760 return isImmTy(ImmTySendMsg); 6761 } 6762 6763 //===----------------------------------------------------------------------===// 6764 // v_interp 6765 //===----------------------------------------------------------------------===// 6766 6767 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6768 StringRef Str; 6769 SMLoc S = getLoc(); 6770 6771 if (!parseId(Str)) 6772 return MatchOperand_NoMatch; 6773 6774 int Slot = StringSwitch<int>(Str) 6775 .Case("p10", 0) 6776 .Case("p20", 1) 6777 .Case("p0", 2) 6778 .Default(-1); 6779 6780 if (Slot == -1) { 6781 Error(S, "invalid interpolation slot"); 6782 return MatchOperand_ParseFail; 6783 } 6784 6785 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6786 AMDGPUOperand::ImmTyInterpSlot)); 6787 return MatchOperand_Success; 6788 } 6789 6790 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6791 StringRef Str; 6792 SMLoc S = getLoc(); 6793 6794 if (!parseId(Str)) 6795 return MatchOperand_NoMatch; 6796 6797 if (!Str.startswith("attr")) { 6798 Error(S, "invalid interpolation attribute"); 6799 return MatchOperand_ParseFail; 6800 } 6801 6802 StringRef Chan = Str.take_back(2); 6803 int AttrChan = StringSwitch<int>(Chan) 6804 .Case(".x", 0) 6805 .Case(".y", 1) 6806 .Case(".z", 2) 6807 .Case(".w", 3) 6808 .Default(-1); 6809 if (AttrChan == -1) { 6810 Error(S, "invalid or missing interpolation attribute channel"); 6811 return MatchOperand_ParseFail; 6812 } 6813 6814 Str = Str.drop_back(2).drop_front(4); 6815 6816 uint8_t Attr; 6817 if (Str.getAsInteger(10, Attr)) { 6818 Error(S, "invalid or missing interpolation attribute number"); 6819 return MatchOperand_ParseFail; 6820 } 6821 6822 if (Attr > 63) { 6823 Error(S, "out of bounds interpolation attribute number"); 6824 return MatchOperand_ParseFail; 6825 } 6826 6827 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6828 6829 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6830 AMDGPUOperand::ImmTyInterpAttr)); 6831 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6832 AMDGPUOperand::ImmTyAttrChan)); 6833 return MatchOperand_Success; 6834 } 6835 6836 //===----------------------------------------------------------------------===// 6837 // exp 6838 //===----------------------------------------------------------------------===// 6839 6840 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6841 using namespace llvm::AMDGPU::Exp; 6842 6843 StringRef Str; 6844 SMLoc S = getLoc(); 6845 6846 if (!parseId(Str)) 6847 return MatchOperand_NoMatch; 6848 6849 unsigned Id = getTgtId(Str); 6850 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6851 Error(S, (Id == ET_INVALID) ? 6852 "invalid exp target" : 6853 "exp target is not supported on this GPU"); 6854 return MatchOperand_ParseFail; 6855 } 6856 6857 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6858 AMDGPUOperand::ImmTyExpTgt)); 6859 return MatchOperand_Success; 6860 } 6861 6862 //===----------------------------------------------------------------------===// 6863 // parser helpers 6864 //===----------------------------------------------------------------------===// 6865 6866 bool 6867 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6868 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6869 } 6870 6871 bool 6872 AMDGPUAsmParser::isId(const StringRef Id) const { 6873 return isId(getToken(), Id); 6874 } 6875 6876 bool 6877 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6878 return getTokenKind() == Kind; 6879 } 6880 6881 bool 6882 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6883 if (isId(Id)) { 6884 lex(); 6885 return true; 6886 } 6887 return false; 6888 } 6889 6890 bool 6891 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6892 if (isToken(AsmToken::Identifier)) { 6893 StringRef Tok = getTokenStr(); 6894 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6895 lex(); 6896 return true; 6897 } 6898 } 6899 return false; 6900 } 6901 6902 bool 6903 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6904 if (isId(Id) && peekToken().is(Kind)) { 6905 lex(); 6906 lex(); 6907 return true; 6908 } 6909 return false; 6910 } 6911 6912 bool 6913 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6914 if (isToken(Kind)) { 6915 lex(); 6916 return true; 6917 } 6918 return false; 6919 } 6920 6921 bool 6922 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6923 const StringRef ErrMsg) { 6924 if (!trySkipToken(Kind)) { 6925 Error(getLoc(), ErrMsg); 6926 return false; 6927 } 6928 return true; 6929 } 6930 6931 bool 6932 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6933 SMLoc S = getLoc(); 6934 6935 const MCExpr *Expr; 6936 if (Parser.parseExpression(Expr)) 6937 return false; 6938 6939 if (Expr->evaluateAsAbsolute(Imm)) 6940 return true; 6941 6942 if (Expected.empty()) { 6943 Error(S, "expected absolute expression"); 6944 } else { 6945 Error(S, Twine("expected ", Expected) + 6946 Twine(" or an absolute expression")); 6947 } 6948 return false; 6949 } 6950 6951 bool 6952 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6953 SMLoc S = getLoc(); 6954 6955 const MCExpr *Expr; 6956 if (Parser.parseExpression(Expr)) 6957 return false; 6958 6959 int64_t IntVal; 6960 if (Expr->evaluateAsAbsolute(IntVal)) { 6961 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6962 } else { 6963 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6964 } 6965 return true; 6966 } 6967 6968 bool 6969 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6970 if (isToken(AsmToken::String)) { 6971 Val = getToken().getStringContents(); 6972 lex(); 6973 return true; 6974 } else { 6975 Error(getLoc(), ErrMsg); 6976 return false; 6977 } 6978 } 6979 6980 bool 6981 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6982 if (isToken(AsmToken::Identifier)) { 6983 Val = getTokenStr(); 6984 lex(); 6985 return true; 6986 } else { 6987 if (!ErrMsg.empty()) 6988 Error(getLoc(), ErrMsg); 6989 return false; 6990 } 6991 } 6992 6993 AsmToken 6994 AMDGPUAsmParser::getToken() const { 6995 return Parser.getTok(); 6996 } 6997 6998 AsmToken 6999 AMDGPUAsmParser::peekToken() { 7000 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7001 } 7002 7003 void 7004 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7005 auto TokCount = getLexer().peekTokens(Tokens); 7006 7007 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7008 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7009 } 7010 7011 AsmToken::TokenKind 7012 AMDGPUAsmParser::getTokenKind() const { 7013 return getLexer().getKind(); 7014 } 7015 7016 SMLoc 7017 AMDGPUAsmParser::getLoc() const { 7018 return getToken().getLoc(); 7019 } 7020 7021 StringRef 7022 AMDGPUAsmParser::getTokenStr() const { 7023 return getToken().getString(); 7024 } 7025 7026 void 7027 AMDGPUAsmParser::lex() { 7028 Parser.Lex(); 7029 } 7030 7031 SMLoc 7032 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7033 const OperandVector &Operands) const { 7034 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7035 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7036 if (Test(Op)) 7037 return Op.getStartLoc(); 7038 } 7039 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7040 } 7041 7042 SMLoc 7043 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7044 const OperandVector &Operands) const { 7045 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7046 return getOperandLoc(Test, Operands); 7047 } 7048 7049 SMLoc 7050 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7051 const OperandVector &Operands) const { 7052 auto Test = [=](const AMDGPUOperand& Op) { 7053 return Op.isRegKind() && Op.getReg() == Reg; 7054 }; 7055 return getOperandLoc(Test, Operands); 7056 } 7057 7058 SMLoc 7059 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7060 auto Test = [](const AMDGPUOperand& Op) { 7061 return Op.IsImmKindLiteral() || Op.isExpr(); 7062 }; 7063 return getOperandLoc(Test, Operands); 7064 } 7065 7066 SMLoc 7067 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7068 auto Test = [](const AMDGPUOperand& Op) { 7069 return Op.isImmKindConst(); 7070 }; 7071 return getOperandLoc(Test, Operands); 7072 } 7073 7074 //===----------------------------------------------------------------------===// 7075 // swizzle 7076 //===----------------------------------------------------------------------===// 7077 7078 LLVM_READNONE 7079 static unsigned 7080 encodeBitmaskPerm(const unsigned AndMask, 7081 const unsigned OrMask, 7082 const unsigned XorMask) { 7083 using namespace llvm::AMDGPU::Swizzle; 7084 7085 return BITMASK_PERM_ENC | 7086 (AndMask << BITMASK_AND_SHIFT) | 7087 (OrMask << BITMASK_OR_SHIFT) | 7088 (XorMask << BITMASK_XOR_SHIFT); 7089 } 7090 7091 bool 7092 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7093 const unsigned MinVal, 7094 const unsigned MaxVal, 7095 const StringRef ErrMsg, 7096 SMLoc &Loc) { 7097 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7098 return false; 7099 } 7100 Loc = getLoc(); 7101 if (!parseExpr(Op)) { 7102 return false; 7103 } 7104 if (Op < MinVal || Op > MaxVal) { 7105 Error(Loc, ErrMsg); 7106 return false; 7107 } 7108 7109 return true; 7110 } 7111 7112 bool 7113 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7114 const unsigned MinVal, 7115 const unsigned MaxVal, 7116 const StringRef ErrMsg) { 7117 SMLoc Loc; 7118 for (unsigned i = 0; i < OpNum; ++i) { 7119 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7120 return false; 7121 } 7122 7123 return true; 7124 } 7125 7126 bool 7127 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7128 using namespace llvm::AMDGPU::Swizzle; 7129 7130 int64_t Lane[LANE_NUM]; 7131 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7132 "expected a 2-bit lane id")) { 7133 Imm = QUAD_PERM_ENC; 7134 for (unsigned I = 0; I < LANE_NUM; ++I) { 7135 Imm |= Lane[I] << (LANE_SHIFT * I); 7136 } 7137 return true; 7138 } 7139 return false; 7140 } 7141 7142 bool 7143 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7144 using namespace llvm::AMDGPU::Swizzle; 7145 7146 SMLoc Loc; 7147 int64_t GroupSize; 7148 int64_t LaneIdx; 7149 7150 if (!parseSwizzleOperand(GroupSize, 7151 2, 32, 7152 "group size must be in the interval [2,32]", 7153 Loc)) { 7154 return false; 7155 } 7156 if (!isPowerOf2_64(GroupSize)) { 7157 Error(Loc, "group size must be a power of two"); 7158 return false; 7159 } 7160 if (parseSwizzleOperand(LaneIdx, 7161 0, GroupSize - 1, 7162 "lane id must be in the interval [0,group size - 1]", 7163 Loc)) { 7164 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7165 return true; 7166 } 7167 return false; 7168 } 7169 7170 bool 7171 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7172 using namespace llvm::AMDGPU::Swizzle; 7173 7174 SMLoc Loc; 7175 int64_t GroupSize; 7176 7177 if (!parseSwizzleOperand(GroupSize, 7178 2, 32, 7179 "group size must be in the interval [2,32]", 7180 Loc)) { 7181 return false; 7182 } 7183 if (!isPowerOf2_64(GroupSize)) { 7184 Error(Loc, "group size must be a power of two"); 7185 return false; 7186 } 7187 7188 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7189 return true; 7190 } 7191 7192 bool 7193 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7194 using namespace llvm::AMDGPU::Swizzle; 7195 7196 SMLoc Loc; 7197 int64_t GroupSize; 7198 7199 if (!parseSwizzleOperand(GroupSize, 7200 1, 16, 7201 "group size must be in the interval [1,16]", 7202 Loc)) { 7203 return false; 7204 } 7205 if (!isPowerOf2_64(GroupSize)) { 7206 Error(Loc, "group size must be a power of two"); 7207 return false; 7208 } 7209 7210 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7211 return true; 7212 } 7213 7214 bool 7215 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7216 using namespace llvm::AMDGPU::Swizzle; 7217 7218 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7219 return false; 7220 } 7221 7222 StringRef Ctl; 7223 SMLoc StrLoc = getLoc(); 7224 if (!parseString(Ctl)) { 7225 return false; 7226 } 7227 if (Ctl.size() != BITMASK_WIDTH) { 7228 Error(StrLoc, "expected a 5-character mask"); 7229 return false; 7230 } 7231 7232 unsigned AndMask = 0; 7233 unsigned OrMask = 0; 7234 unsigned XorMask = 0; 7235 7236 for (size_t i = 0; i < Ctl.size(); ++i) { 7237 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7238 switch(Ctl[i]) { 7239 default: 7240 Error(StrLoc, "invalid mask"); 7241 return false; 7242 case '0': 7243 break; 7244 case '1': 7245 OrMask |= Mask; 7246 break; 7247 case 'p': 7248 AndMask |= Mask; 7249 break; 7250 case 'i': 7251 AndMask |= Mask; 7252 XorMask |= Mask; 7253 break; 7254 } 7255 } 7256 7257 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7258 return true; 7259 } 7260 7261 bool 7262 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7263 7264 SMLoc OffsetLoc = getLoc(); 7265 7266 if (!parseExpr(Imm, "a swizzle macro")) { 7267 return false; 7268 } 7269 if (!isUInt<16>(Imm)) { 7270 Error(OffsetLoc, "expected a 16-bit offset"); 7271 return false; 7272 } 7273 return true; 7274 } 7275 7276 bool 7277 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7278 using namespace llvm::AMDGPU::Swizzle; 7279 7280 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7281 7282 SMLoc ModeLoc = getLoc(); 7283 bool Ok = false; 7284 7285 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7286 Ok = parseSwizzleQuadPerm(Imm); 7287 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7288 Ok = parseSwizzleBitmaskPerm(Imm); 7289 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7290 Ok = parseSwizzleBroadcast(Imm); 7291 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7292 Ok = parseSwizzleSwap(Imm); 7293 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7294 Ok = parseSwizzleReverse(Imm); 7295 } else { 7296 Error(ModeLoc, "expected a swizzle mode"); 7297 } 7298 7299 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7300 } 7301 7302 return false; 7303 } 7304 7305 OperandMatchResultTy 7306 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7307 SMLoc S = getLoc(); 7308 int64_t Imm = 0; 7309 7310 if (trySkipId("offset")) { 7311 7312 bool Ok = false; 7313 if (skipToken(AsmToken::Colon, "expected a colon")) { 7314 if (trySkipId("swizzle")) { 7315 Ok = parseSwizzleMacro(Imm); 7316 } else { 7317 Ok = parseSwizzleOffset(Imm); 7318 } 7319 } 7320 7321 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7322 7323 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7324 } else { 7325 // Swizzle "offset" operand is optional. 7326 // If it is omitted, try parsing other optional operands. 7327 return parseOptionalOpr(Operands); 7328 } 7329 } 7330 7331 bool 7332 AMDGPUOperand::isSwizzle() const { 7333 return isImmTy(ImmTySwizzle); 7334 } 7335 7336 //===----------------------------------------------------------------------===// 7337 // VGPR Index Mode 7338 //===----------------------------------------------------------------------===// 7339 7340 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7341 7342 using namespace llvm::AMDGPU::VGPRIndexMode; 7343 7344 if (trySkipToken(AsmToken::RParen)) { 7345 return OFF; 7346 } 7347 7348 int64_t Imm = 0; 7349 7350 while (true) { 7351 unsigned Mode = 0; 7352 SMLoc S = getLoc(); 7353 7354 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7355 if (trySkipId(IdSymbolic[ModeId])) { 7356 Mode = 1 << ModeId; 7357 break; 7358 } 7359 } 7360 7361 if (Mode == 0) { 7362 Error(S, (Imm == 0)? 7363 "expected a VGPR index mode or a closing parenthesis" : 7364 "expected a VGPR index mode"); 7365 return UNDEF; 7366 } 7367 7368 if (Imm & Mode) { 7369 Error(S, "duplicate VGPR index mode"); 7370 return UNDEF; 7371 } 7372 Imm |= Mode; 7373 7374 if (trySkipToken(AsmToken::RParen)) 7375 break; 7376 if (!skipToken(AsmToken::Comma, 7377 "expected a comma or a closing parenthesis")) 7378 return UNDEF; 7379 } 7380 7381 return Imm; 7382 } 7383 7384 OperandMatchResultTy 7385 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7386 7387 using namespace llvm::AMDGPU::VGPRIndexMode; 7388 7389 int64_t Imm = 0; 7390 SMLoc S = getLoc(); 7391 7392 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7393 Imm = parseGPRIdxMacro(); 7394 if (Imm == UNDEF) 7395 return MatchOperand_ParseFail; 7396 } else { 7397 if (getParser().parseAbsoluteExpression(Imm)) 7398 return MatchOperand_ParseFail; 7399 if (Imm < 0 || !isUInt<4>(Imm)) { 7400 Error(S, "invalid immediate: only 4-bit values are legal"); 7401 return MatchOperand_ParseFail; 7402 } 7403 } 7404 7405 Operands.push_back( 7406 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7407 return MatchOperand_Success; 7408 } 7409 7410 bool AMDGPUOperand::isGPRIdxMode() const { 7411 return isImmTy(ImmTyGprIdxMode); 7412 } 7413 7414 //===----------------------------------------------------------------------===// 7415 // sopp branch targets 7416 //===----------------------------------------------------------------------===// 7417 7418 OperandMatchResultTy 7419 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7420 7421 // Make sure we are not parsing something 7422 // that looks like a label or an expression but is not. 7423 // This will improve error messages. 7424 if (isRegister() || isModifier()) 7425 return MatchOperand_NoMatch; 7426 7427 if (!parseExpr(Operands)) 7428 return MatchOperand_ParseFail; 7429 7430 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7431 assert(Opr.isImm() || Opr.isExpr()); 7432 SMLoc Loc = Opr.getStartLoc(); 7433 7434 // Currently we do not support arbitrary expressions as branch targets. 7435 // Only labels and absolute expressions are accepted. 7436 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7437 Error(Loc, "expected an absolute expression or a label"); 7438 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7439 Error(Loc, "expected a 16-bit signed jump offset"); 7440 } 7441 7442 return MatchOperand_Success; 7443 } 7444 7445 //===----------------------------------------------------------------------===// 7446 // Boolean holding registers 7447 //===----------------------------------------------------------------------===// 7448 7449 OperandMatchResultTy 7450 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7451 return parseReg(Operands); 7452 } 7453 7454 //===----------------------------------------------------------------------===// 7455 // mubuf 7456 //===----------------------------------------------------------------------===// 7457 7458 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7459 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7460 } 7461 7462 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7463 const OperandVector &Operands, 7464 bool IsAtomic, 7465 bool IsLds) { 7466 OptionalImmIndexMap OptionalIdx; 7467 unsigned FirstOperandIdx = 1; 7468 bool IsAtomicReturn = false; 7469 7470 if (IsAtomic) { 7471 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7472 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7473 if (!Op.isCPol()) 7474 continue; 7475 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7476 break; 7477 } 7478 7479 if (!IsAtomicReturn) { 7480 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7481 if (NewOpc != -1) 7482 Inst.setOpcode(NewOpc); 7483 } 7484 7485 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7486 SIInstrFlags::IsAtomicRet; 7487 } 7488 7489 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7490 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7491 7492 // Add the register arguments 7493 if (Op.isReg()) { 7494 Op.addRegOperands(Inst, 1); 7495 // Insert a tied src for atomic return dst. 7496 // This cannot be postponed as subsequent calls to 7497 // addImmOperands rely on correct number of MC operands. 7498 if (IsAtomicReturn && i == FirstOperandIdx) 7499 Op.addRegOperands(Inst, 1); 7500 continue; 7501 } 7502 7503 // Handle the case where soffset is an immediate 7504 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7505 Op.addImmOperands(Inst, 1); 7506 continue; 7507 } 7508 7509 // Handle tokens like 'offen' which are sometimes hard-coded into the 7510 // asm string. There are no MCInst operands for these. 7511 if (Op.isToken()) { 7512 continue; 7513 } 7514 assert(Op.isImm()); 7515 7516 // Handle optional arguments 7517 OptionalIdx[Op.getImmTy()] = i; 7518 } 7519 7520 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7521 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7522 7523 if (!IsLds) { // tfe is not legal with lds opcodes 7524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7525 } 7526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7527 } 7528 7529 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7530 OptionalImmIndexMap OptionalIdx; 7531 7532 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7533 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7534 7535 // Add the register arguments 7536 if (Op.isReg()) { 7537 Op.addRegOperands(Inst, 1); 7538 continue; 7539 } 7540 7541 // Handle the case where soffset is an immediate 7542 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7543 Op.addImmOperands(Inst, 1); 7544 continue; 7545 } 7546 7547 // Handle tokens like 'offen' which are sometimes hard-coded into the 7548 // asm string. There are no MCInst operands for these. 7549 if (Op.isToken()) { 7550 continue; 7551 } 7552 assert(Op.isImm()); 7553 7554 // Handle optional arguments 7555 OptionalIdx[Op.getImmTy()] = i; 7556 } 7557 7558 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7559 AMDGPUOperand::ImmTyOffset); 7560 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7561 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7562 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7563 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7564 } 7565 7566 //===----------------------------------------------------------------------===// 7567 // mimg 7568 //===----------------------------------------------------------------------===// 7569 7570 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7571 bool IsAtomic) { 7572 unsigned I = 1; 7573 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7574 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7575 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7576 } 7577 7578 if (IsAtomic) { 7579 // Add src, same as dst 7580 assert(Desc.getNumDefs() == 1); 7581 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7582 } 7583 7584 OptionalImmIndexMap OptionalIdx; 7585 7586 for (unsigned E = Operands.size(); I != E; ++I) { 7587 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7588 7589 // Add the register arguments 7590 if (Op.isReg()) { 7591 Op.addRegOperands(Inst, 1); 7592 } else if (Op.isImmModifier()) { 7593 OptionalIdx[Op.getImmTy()] = I; 7594 } else if (!Op.isToken()) { 7595 llvm_unreachable("unexpected operand type"); 7596 } 7597 } 7598 7599 bool IsGFX10Plus = isGFX10Plus(); 7600 7601 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7602 if (IsGFX10Plus) 7603 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7604 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7605 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7606 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7607 if (IsGFX10Plus) 7608 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7609 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7610 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7611 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7612 if (!IsGFX10Plus) 7613 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7614 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7615 } 7616 7617 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7618 cvtMIMG(Inst, Operands, true); 7619 } 7620 7621 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7622 OptionalImmIndexMap OptionalIdx; 7623 bool IsAtomicReturn = false; 7624 7625 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7626 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7627 if (!Op.isCPol()) 7628 continue; 7629 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7630 break; 7631 } 7632 7633 if (!IsAtomicReturn) { 7634 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7635 if (NewOpc != -1) 7636 Inst.setOpcode(NewOpc); 7637 } 7638 7639 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7640 SIInstrFlags::IsAtomicRet; 7641 7642 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7643 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7644 7645 // Add the register arguments 7646 if (Op.isReg()) { 7647 Op.addRegOperands(Inst, 1); 7648 if (IsAtomicReturn && i == 1) 7649 Op.addRegOperands(Inst, 1); 7650 continue; 7651 } 7652 7653 // Handle the case where soffset is an immediate 7654 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7655 Op.addImmOperands(Inst, 1); 7656 continue; 7657 } 7658 7659 // Handle tokens like 'offen' which are sometimes hard-coded into the 7660 // asm string. There are no MCInst operands for these. 7661 if (Op.isToken()) { 7662 continue; 7663 } 7664 assert(Op.isImm()); 7665 7666 // Handle optional arguments 7667 OptionalIdx[Op.getImmTy()] = i; 7668 } 7669 7670 if ((int)Inst.getNumOperands() <= 7671 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7674 } 7675 7676 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7677 const OperandVector &Operands) { 7678 for (unsigned I = 1; I < Operands.size(); ++I) { 7679 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7680 if (Operand.isReg()) 7681 Operand.addRegOperands(Inst, 1); 7682 } 7683 7684 Inst.addOperand(MCOperand::createImm(1)); // a16 7685 } 7686 7687 //===----------------------------------------------------------------------===// 7688 // smrd 7689 //===----------------------------------------------------------------------===// 7690 7691 bool AMDGPUOperand::isSMRDOffset8() const { 7692 return isImm() && isUInt<8>(getImm()); 7693 } 7694 7695 bool AMDGPUOperand::isSMEMOffset() const { 7696 return isImm(); // Offset range is checked later by validator. 7697 } 7698 7699 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7700 // 32-bit literals are only supported on CI and we only want to use them 7701 // when the offset is > 8-bits. 7702 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7703 } 7704 7705 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7706 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7707 } 7708 7709 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7710 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7711 } 7712 7713 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7714 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7715 } 7716 7717 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7718 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7719 } 7720 7721 //===----------------------------------------------------------------------===// 7722 // vop3 7723 //===----------------------------------------------------------------------===// 7724 7725 static bool ConvertOmodMul(int64_t &Mul) { 7726 if (Mul != 1 && Mul != 2 && Mul != 4) 7727 return false; 7728 7729 Mul >>= 1; 7730 return true; 7731 } 7732 7733 static bool ConvertOmodDiv(int64_t &Div) { 7734 if (Div == 1) { 7735 Div = 0; 7736 return true; 7737 } 7738 7739 if (Div == 2) { 7740 Div = 3; 7741 return true; 7742 } 7743 7744 return false; 7745 } 7746 7747 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7748 // This is intentional and ensures compatibility with sp3. 7749 // See bug 35397 for details. 7750 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7751 if (BoundCtrl == 0 || BoundCtrl == 1) { 7752 BoundCtrl = 1; 7753 return true; 7754 } 7755 return false; 7756 } 7757 7758 // Note: the order in this table matches the order of operands in AsmString. 7759 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7760 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7761 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7762 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7763 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7764 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7765 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7766 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7767 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7768 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7769 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7770 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7771 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7772 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7773 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7774 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7775 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7776 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7777 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7778 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7779 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7780 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7781 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7782 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7783 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7784 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7785 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7786 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7787 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7788 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7789 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7790 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7791 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7792 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7793 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7794 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7795 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7796 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7797 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7798 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7799 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7800 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7801 }; 7802 7803 void AMDGPUAsmParser::onBeginOfFile() { 7804 if (!getParser().getStreamer().getTargetStreamer() || 7805 getSTI().getTargetTriple().getArch() == Triple::r600) 7806 return; 7807 7808 if (!getTargetStreamer().getTargetID()) 7809 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7810 7811 if (isHsaAbiVersion3AndAbove(&getSTI())) 7812 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7813 } 7814 7815 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7816 7817 OperandMatchResultTy res = parseOptionalOpr(Operands); 7818 7819 // This is a hack to enable hardcoded mandatory operands which follow 7820 // optional operands. 7821 // 7822 // Current design assumes that all operands after the first optional operand 7823 // are also optional. However implementation of some instructions violates 7824 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7825 // 7826 // To alleviate this problem, we have to (implicitly) parse extra operands 7827 // to make sure autogenerated parser of custom operands never hit hardcoded 7828 // mandatory operands. 7829 7830 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7831 if (res != MatchOperand_Success || 7832 isToken(AsmToken::EndOfStatement)) 7833 break; 7834 7835 trySkipToken(AsmToken::Comma); 7836 res = parseOptionalOpr(Operands); 7837 } 7838 7839 return res; 7840 } 7841 7842 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7843 OperandMatchResultTy res; 7844 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7845 // try to parse any optional operand here 7846 if (Op.IsBit) { 7847 res = parseNamedBit(Op.Name, Operands, Op.Type); 7848 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7849 res = parseOModOperand(Operands); 7850 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7851 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7852 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7853 res = parseSDWASel(Operands, Op.Name, Op.Type); 7854 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7855 res = parseSDWADstUnused(Operands); 7856 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7857 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7858 Op.Type == AMDGPUOperand::ImmTyNegLo || 7859 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7860 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7861 Op.ConvertResult); 7862 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7863 res = parseDim(Operands); 7864 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7865 res = parseCPol(Operands); 7866 } else { 7867 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7868 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7869 res = parseOperandArrayWithPrefix("neg", Operands, 7870 AMDGPUOperand::ImmTyBLGP, 7871 nullptr); 7872 } 7873 } 7874 if (res != MatchOperand_NoMatch) { 7875 return res; 7876 } 7877 } 7878 return MatchOperand_NoMatch; 7879 } 7880 7881 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7882 StringRef Name = getTokenStr(); 7883 if (Name == "mul") { 7884 return parseIntWithPrefix("mul", Operands, 7885 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7886 } 7887 7888 if (Name == "div") { 7889 return parseIntWithPrefix("div", Operands, 7890 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7891 } 7892 7893 return MatchOperand_NoMatch; 7894 } 7895 7896 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7897 cvtVOP3P(Inst, Operands); 7898 7899 int Opc = Inst.getOpcode(); 7900 7901 int SrcNum; 7902 const int Ops[] = { AMDGPU::OpName::src0, 7903 AMDGPU::OpName::src1, 7904 AMDGPU::OpName::src2 }; 7905 for (SrcNum = 0; 7906 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7907 ++SrcNum); 7908 assert(SrcNum > 0); 7909 7910 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7911 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7912 7913 if ((OpSel & (1 << SrcNum)) != 0) { 7914 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7915 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7916 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7917 } 7918 } 7919 7920 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7921 // 1. This operand is input modifiers 7922 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7923 // 2. This is not last operand 7924 && Desc.NumOperands > (OpNum + 1) 7925 // 3. Next operand is register class 7926 && Desc.OpInfo[OpNum + 1].RegClass != -1 7927 // 4. Next register is not tied to any other operand 7928 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7929 } 7930 7931 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7932 { 7933 OptionalImmIndexMap OptionalIdx; 7934 unsigned Opc = Inst.getOpcode(); 7935 7936 unsigned I = 1; 7937 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7938 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7939 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7940 } 7941 7942 for (unsigned E = Operands.size(); I != E; ++I) { 7943 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7944 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7945 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7946 } else if (Op.isInterpSlot() || 7947 Op.isInterpAttr() || 7948 Op.isAttrChan()) { 7949 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7950 } else if (Op.isImmModifier()) { 7951 OptionalIdx[Op.getImmTy()] = I; 7952 } else { 7953 llvm_unreachable("unhandled operand type"); 7954 } 7955 } 7956 7957 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7959 } 7960 7961 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7962 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7963 } 7964 7965 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7967 } 7968 } 7969 7970 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7971 OptionalImmIndexMap &OptionalIdx) { 7972 unsigned Opc = Inst.getOpcode(); 7973 7974 unsigned I = 1; 7975 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7976 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7977 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7978 } 7979 7980 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7981 // This instruction has src modifiers 7982 for (unsigned E = Operands.size(); I != E; ++I) { 7983 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7984 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7985 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7986 } else if (Op.isImmModifier()) { 7987 OptionalIdx[Op.getImmTy()] = I; 7988 } else if (Op.isRegOrImm()) { 7989 Op.addRegOrImmOperands(Inst, 1); 7990 } else { 7991 llvm_unreachable("unhandled operand type"); 7992 } 7993 } 7994 } else { 7995 // No src modifiers 7996 for (unsigned E = Operands.size(); I != E; ++I) { 7997 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7998 if (Op.isMod()) { 7999 OptionalIdx[Op.getImmTy()] = I; 8000 } else { 8001 Op.addRegOrImmOperands(Inst, 1); 8002 } 8003 } 8004 } 8005 8006 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8008 } 8009 8010 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8012 } 8013 8014 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8015 // it has src2 register operand that is tied to dst operand 8016 // we don't allow modifiers for this operand in assembler so src2_modifiers 8017 // should be 0. 8018 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8019 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8020 Opc == AMDGPU::V_MAC_F32_e64_vi || 8021 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8022 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8023 Opc == AMDGPU::V_MAC_F16_e64_vi || 8024 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8025 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8026 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8027 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8028 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 8029 auto it = Inst.begin(); 8030 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8031 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8032 ++it; 8033 // Copy the operand to ensure it's not invalidated when Inst grows. 8034 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8035 } 8036 } 8037 8038 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8039 OptionalImmIndexMap OptionalIdx; 8040 cvtVOP3(Inst, Operands, OptionalIdx); 8041 } 8042 8043 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8044 OptionalImmIndexMap &OptIdx) { 8045 const int Opc = Inst.getOpcode(); 8046 const MCInstrDesc &Desc = MII.get(Opc); 8047 8048 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8049 8050 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8051 assert(!IsPacked); 8052 Inst.addOperand(Inst.getOperand(0)); 8053 } 8054 8055 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8056 // instruction, and then figure out where to actually put the modifiers 8057 8058 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8059 if (OpSelIdx != -1) { 8060 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8061 } 8062 8063 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8064 if (OpSelHiIdx != -1) { 8065 int DefaultVal = IsPacked ? -1 : 0; 8066 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8067 DefaultVal); 8068 } 8069 8070 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8071 if (NegLoIdx != -1) { 8072 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8073 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8074 } 8075 8076 const int Ops[] = { AMDGPU::OpName::src0, 8077 AMDGPU::OpName::src1, 8078 AMDGPU::OpName::src2 }; 8079 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8080 AMDGPU::OpName::src1_modifiers, 8081 AMDGPU::OpName::src2_modifiers }; 8082 8083 unsigned OpSel = 0; 8084 unsigned OpSelHi = 0; 8085 unsigned NegLo = 0; 8086 unsigned NegHi = 0; 8087 8088 if (OpSelIdx != -1) 8089 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8090 8091 if (OpSelHiIdx != -1) 8092 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8093 8094 if (NegLoIdx != -1) { 8095 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8096 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8097 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8098 } 8099 8100 for (int J = 0; J < 3; ++J) { 8101 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8102 if (OpIdx == -1) 8103 break; 8104 8105 uint32_t ModVal = 0; 8106 8107 if ((OpSel & (1 << J)) != 0) 8108 ModVal |= SISrcMods::OP_SEL_0; 8109 8110 if ((OpSelHi & (1 << J)) != 0) 8111 ModVal |= SISrcMods::OP_SEL_1; 8112 8113 if ((NegLo & (1 << J)) != 0) 8114 ModVal |= SISrcMods::NEG; 8115 8116 if ((NegHi & (1 << J)) != 0) 8117 ModVal |= SISrcMods::NEG_HI; 8118 8119 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8120 8121 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8122 } 8123 } 8124 8125 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8126 OptionalImmIndexMap OptIdx; 8127 cvtVOP3(Inst, Operands, OptIdx); 8128 cvtVOP3P(Inst, Operands, OptIdx); 8129 } 8130 8131 //===----------------------------------------------------------------------===// 8132 // dpp 8133 //===----------------------------------------------------------------------===// 8134 8135 bool AMDGPUOperand::isDPP8() const { 8136 return isImmTy(ImmTyDPP8); 8137 } 8138 8139 bool AMDGPUOperand::isDPPCtrl() const { 8140 using namespace AMDGPU::DPP; 8141 8142 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8143 if (result) { 8144 int64_t Imm = getImm(); 8145 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8146 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8147 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8148 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8149 (Imm == DppCtrl::WAVE_SHL1) || 8150 (Imm == DppCtrl::WAVE_ROL1) || 8151 (Imm == DppCtrl::WAVE_SHR1) || 8152 (Imm == DppCtrl::WAVE_ROR1) || 8153 (Imm == DppCtrl::ROW_MIRROR) || 8154 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8155 (Imm == DppCtrl::BCAST15) || 8156 (Imm == DppCtrl::BCAST31) || 8157 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8158 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8159 } 8160 return false; 8161 } 8162 8163 //===----------------------------------------------------------------------===// 8164 // mAI 8165 //===----------------------------------------------------------------------===// 8166 8167 bool AMDGPUOperand::isBLGP() const { 8168 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8169 } 8170 8171 bool AMDGPUOperand::isCBSZ() const { 8172 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8173 } 8174 8175 bool AMDGPUOperand::isABID() const { 8176 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8177 } 8178 8179 bool AMDGPUOperand::isS16Imm() const { 8180 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8181 } 8182 8183 bool AMDGPUOperand::isU16Imm() const { 8184 return isImm() && isUInt<16>(getImm()); 8185 } 8186 8187 //===----------------------------------------------------------------------===// 8188 // dim 8189 //===----------------------------------------------------------------------===// 8190 8191 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8192 // We want to allow "dim:1D" etc., 8193 // but the initial 1 is tokenized as an integer. 8194 std::string Token; 8195 if (isToken(AsmToken::Integer)) { 8196 SMLoc Loc = getToken().getEndLoc(); 8197 Token = std::string(getTokenStr()); 8198 lex(); 8199 if (getLoc() != Loc) 8200 return false; 8201 } 8202 8203 StringRef Suffix; 8204 if (!parseId(Suffix)) 8205 return false; 8206 Token += Suffix; 8207 8208 StringRef DimId = Token; 8209 if (DimId.startswith("SQ_RSRC_IMG_")) 8210 DimId = DimId.drop_front(12); 8211 8212 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8213 if (!DimInfo) 8214 return false; 8215 8216 Encoding = DimInfo->Encoding; 8217 return true; 8218 } 8219 8220 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8221 if (!isGFX10Plus()) 8222 return MatchOperand_NoMatch; 8223 8224 SMLoc S = getLoc(); 8225 8226 if (!trySkipId("dim", AsmToken::Colon)) 8227 return MatchOperand_NoMatch; 8228 8229 unsigned Encoding; 8230 SMLoc Loc = getLoc(); 8231 if (!parseDimId(Encoding)) { 8232 Error(Loc, "invalid dim value"); 8233 return MatchOperand_ParseFail; 8234 } 8235 8236 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8237 AMDGPUOperand::ImmTyDim)); 8238 return MatchOperand_Success; 8239 } 8240 8241 //===----------------------------------------------------------------------===// 8242 // dpp 8243 //===----------------------------------------------------------------------===// 8244 8245 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8246 SMLoc S = getLoc(); 8247 8248 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8249 return MatchOperand_NoMatch; 8250 8251 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8252 8253 int64_t Sels[8]; 8254 8255 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8256 return MatchOperand_ParseFail; 8257 8258 for (size_t i = 0; i < 8; ++i) { 8259 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8260 return MatchOperand_ParseFail; 8261 8262 SMLoc Loc = getLoc(); 8263 if (getParser().parseAbsoluteExpression(Sels[i])) 8264 return MatchOperand_ParseFail; 8265 if (0 > Sels[i] || 7 < Sels[i]) { 8266 Error(Loc, "expected a 3-bit value"); 8267 return MatchOperand_ParseFail; 8268 } 8269 } 8270 8271 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8272 return MatchOperand_ParseFail; 8273 8274 unsigned DPP8 = 0; 8275 for (size_t i = 0; i < 8; ++i) 8276 DPP8 |= (Sels[i] << (i * 3)); 8277 8278 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8279 return MatchOperand_Success; 8280 } 8281 8282 bool 8283 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8284 const OperandVector &Operands) { 8285 if (Ctrl == "row_newbcast") 8286 return isGFX90A(); 8287 8288 if (Ctrl == "row_share" || 8289 Ctrl == "row_xmask") 8290 return isGFX10Plus(); 8291 8292 if (Ctrl == "wave_shl" || 8293 Ctrl == "wave_shr" || 8294 Ctrl == "wave_rol" || 8295 Ctrl == "wave_ror" || 8296 Ctrl == "row_bcast") 8297 return isVI() || isGFX9(); 8298 8299 return Ctrl == "row_mirror" || 8300 Ctrl == "row_half_mirror" || 8301 Ctrl == "quad_perm" || 8302 Ctrl == "row_shl" || 8303 Ctrl == "row_shr" || 8304 Ctrl == "row_ror"; 8305 } 8306 8307 int64_t 8308 AMDGPUAsmParser::parseDPPCtrlPerm() { 8309 // quad_perm:[%d,%d,%d,%d] 8310 8311 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8312 return -1; 8313 8314 int64_t Val = 0; 8315 for (int i = 0; i < 4; ++i) { 8316 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8317 return -1; 8318 8319 int64_t Temp; 8320 SMLoc Loc = getLoc(); 8321 if (getParser().parseAbsoluteExpression(Temp)) 8322 return -1; 8323 if (Temp < 0 || Temp > 3) { 8324 Error(Loc, "expected a 2-bit value"); 8325 return -1; 8326 } 8327 8328 Val += (Temp << i * 2); 8329 } 8330 8331 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8332 return -1; 8333 8334 return Val; 8335 } 8336 8337 int64_t 8338 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8339 using namespace AMDGPU::DPP; 8340 8341 // sel:%d 8342 8343 int64_t Val; 8344 SMLoc Loc = getLoc(); 8345 8346 if (getParser().parseAbsoluteExpression(Val)) 8347 return -1; 8348 8349 struct DppCtrlCheck { 8350 int64_t Ctrl; 8351 int Lo; 8352 int Hi; 8353 }; 8354 8355 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8356 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8357 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8358 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8359 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8360 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8361 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8362 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8363 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8364 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8365 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8366 .Default({-1, 0, 0}); 8367 8368 bool Valid; 8369 if (Check.Ctrl == -1) { 8370 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8371 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8372 } else { 8373 Valid = Check.Lo <= Val && Val <= Check.Hi; 8374 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8375 } 8376 8377 if (!Valid) { 8378 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8379 return -1; 8380 } 8381 8382 return Val; 8383 } 8384 8385 OperandMatchResultTy 8386 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8387 using namespace AMDGPU::DPP; 8388 8389 if (!isToken(AsmToken::Identifier) || 8390 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8391 return MatchOperand_NoMatch; 8392 8393 SMLoc S = getLoc(); 8394 int64_t Val = -1; 8395 StringRef Ctrl; 8396 8397 parseId(Ctrl); 8398 8399 if (Ctrl == "row_mirror") { 8400 Val = DppCtrl::ROW_MIRROR; 8401 } else if (Ctrl == "row_half_mirror") { 8402 Val = DppCtrl::ROW_HALF_MIRROR; 8403 } else { 8404 if (skipToken(AsmToken::Colon, "expected a colon")) { 8405 if (Ctrl == "quad_perm") { 8406 Val = parseDPPCtrlPerm(); 8407 } else { 8408 Val = parseDPPCtrlSel(Ctrl); 8409 } 8410 } 8411 } 8412 8413 if (Val == -1) 8414 return MatchOperand_ParseFail; 8415 8416 Operands.push_back( 8417 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8418 return MatchOperand_Success; 8419 } 8420 8421 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8422 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8423 } 8424 8425 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8426 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8427 } 8428 8429 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8430 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8431 } 8432 8433 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8434 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8435 } 8436 8437 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8438 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8439 } 8440 8441 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8442 OptionalImmIndexMap OptionalIdx; 8443 8444 unsigned Opc = Inst.getOpcode(); 8445 bool HasModifiers = 8446 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8447 unsigned I = 1; 8448 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8449 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8450 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8451 } 8452 8453 int Fi = 0; 8454 for (unsigned E = Operands.size(); I != E; ++I) { 8455 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8456 MCOI::TIED_TO); 8457 if (TiedTo != -1) { 8458 assert((unsigned)TiedTo < Inst.getNumOperands()); 8459 // handle tied old or src2 for MAC instructions 8460 Inst.addOperand(Inst.getOperand(TiedTo)); 8461 } 8462 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8463 // Add the register arguments 8464 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8465 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8466 // Skip it. 8467 continue; 8468 } 8469 8470 if (IsDPP8) { 8471 if (Op.isDPP8()) { 8472 Op.addImmOperands(Inst, 1); 8473 } else if (HasModifiers && 8474 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8475 Op.addRegWithFPInputModsOperands(Inst, 2); 8476 } else if (Op.isFI()) { 8477 Fi = Op.getImm(); 8478 } else if (Op.isReg()) { 8479 Op.addRegOperands(Inst, 1); 8480 } else { 8481 llvm_unreachable("Invalid operand type"); 8482 } 8483 } else { 8484 if (HasModifiers && 8485 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8486 Op.addRegWithFPInputModsOperands(Inst, 2); 8487 } else if (Op.isReg()) { 8488 Op.addRegOperands(Inst, 1); 8489 } else if (Op.isDPPCtrl()) { 8490 Op.addImmOperands(Inst, 1); 8491 } else if (Op.isImm()) { 8492 // Handle optional arguments 8493 OptionalIdx[Op.getImmTy()] = I; 8494 } else { 8495 llvm_unreachable("Invalid operand type"); 8496 } 8497 } 8498 } 8499 8500 if (IsDPP8) { 8501 using namespace llvm::AMDGPU::DPP; 8502 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8503 } else { 8504 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8507 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8509 } 8510 } 8511 } 8512 8513 //===----------------------------------------------------------------------===// 8514 // sdwa 8515 //===----------------------------------------------------------------------===// 8516 8517 OperandMatchResultTy 8518 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8519 AMDGPUOperand::ImmTy Type) { 8520 using namespace llvm::AMDGPU::SDWA; 8521 8522 SMLoc S = getLoc(); 8523 StringRef Value; 8524 OperandMatchResultTy res; 8525 8526 SMLoc StringLoc; 8527 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8528 if (res != MatchOperand_Success) { 8529 return res; 8530 } 8531 8532 int64_t Int; 8533 Int = StringSwitch<int64_t>(Value) 8534 .Case("BYTE_0", SdwaSel::BYTE_0) 8535 .Case("BYTE_1", SdwaSel::BYTE_1) 8536 .Case("BYTE_2", SdwaSel::BYTE_2) 8537 .Case("BYTE_3", SdwaSel::BYTE_3) 8538 .Case("WORD_0", SdwaSel::WORD_0) 8539 .Case("WORD_1", SdwaSel::WORD_1) 8540 .Case("DWORD", SdwaSel::DWORD) 8541 .Default(0xffffffff); 8542 8543 if (Int == 0xffffffff) { 8544 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8545 return MatchOperand_ParseFail; 8546 } 8547 8548 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8549 return MatchOperand_Success; 8550 } 8551 8552 OperandMatchResultTy 8553 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8554 using namespace llvm::AMDGPU::SDWA; 8555 8556 SMLoc S = getLoc(); 8557 StringRef Value; 8558 OperandMatchResultTy res; 8559 8560 SMLoc StringLoc; 8561 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8562 if (res != MatchOperand_Success) { 8563 return res; 8564 } 8565 8566 int64_t Int; 8567 Int = StringSwitch<int64_t>(Value) 8568 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8569 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8570 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8571 .Default(0xffffffff); 8572 8573 if (Int == 0xffffffff) { 8574 Error(StringLoc, "invalid dst_unused value"); 8575 return MatchOperand_ParseFail; 8576 } 8577 8578 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8579 return MatchOperand_Success; 8580 } 8581 8582 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8583 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8584 } 8585 8586 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8587 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8588 } 8589 8590 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8591 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8592 } 8593 8594 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8595 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8596 } 8597 8598 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8599 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8600 } 8601 8602 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8603 uint64_t BasicInstType, 8604 bool SkipDstVcc, 8605 bool SkipSrcVcc) { 8606 using namespace llvm::AMDGPU::SDWA; 8607 8608 OptionalImmIndexMap OptionalIdx; 8609 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8610 bool SkippedVcc = false; 8611 8612 unsigned I = 1; 8613 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8614 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8615 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8616 } 8617 8618 for (unsigned E = Operands.size(); I != E; ++I) { 8619 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8620 if (SkipVcc && !SkippedVcc && Op.isReg() && 8621 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8622 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8623 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8624 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8625 // Skip VCC only if we didn't skip it on previous iteration. 8626 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8627 if (BasicInstType == SIInstrFlags::VOP2 && 8628 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8629 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8630 SkippedVcc = true; 8631 continue; 8632 } else if (BasicInstType == SIInstrFlags::VOPC && 8633 Inst.getNumOperands() == 0) { 8634 SkippedVcc = true; 8635 continue; 8636 } 8637 } 8638 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8639 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8640 } else if (Op.isImm()) { 8641 // Handle optional arguments 8642 OptionalIdx[Op.getImmTy()] = I; 8643 } else { 8644 llvm_unreachable("Invalid operand type"); 8645 } 8646 SkippedVcc = false; 8647 } 8648 8649 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8650 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8651 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8652 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8653 switch (BasicInstType) { 8654 case SIInstrFlags::VOP1: 8655 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8656 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8657 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8658 } 8659 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8660 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8661 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8662 break; 8663 8664 case SIInstrFlags::VOP2: 8665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8666 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8668 } 8669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8670 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8671 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8673 break; 8674 8675 case SIInstrFlags::VOPC: 8676 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8678 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8679 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8680 break; 8681 8682 default: 8683 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8684 } 8685 } 8686 8687 // special case v_mac_{f16, f32}: 8688 // it has src2 register operand that is tied to dst operand 8689 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8690 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8691 auto it = Inst.begin(); 8692 std::advance( 8693 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8694 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8695 } 8696 } 8697 8698 //===----------------------------------------------------------------------===// 8699 // mAI 8700 //===----------------------------------------------------------------------===// 8701 8702 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8703 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8704 } 8705 8706 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8707 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8708 } 8709 8710 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8711 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8712 } 8713 8714 /// Force static initialization. 8715 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8716 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8717 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8718 } 8719 8720 #define GET_REGISTER_MATCHER 8721 #define GET_MATCHER_IMPLEMENTATION 8722 #define GET_MNEMONIC_SPELL_CHECKER 8723 #define GET_MNEMONIC_CHECKER 8724 #include "AMDGPUGenAsmMatcher.inc" 8725 8726 // This function should be defined after auto-generated include so that we have 8727 // MatchClassKind enum defined 8728 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8729 unsigned Kind) { 8730 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8731 // But MatchInstructionImpl() expects to meet token and fails to validate 8732 // operand. This method checks if we are given immediate operand but expect to 8733 // get corresponding token. 8734 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8735 switch (Kind) { 8736 case MCK_addr64: 8737 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8738 case MCK_gds: 8739 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8740 case MCK_lds: 8741 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8742 case MCK_idxen: 8743 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8744 case MCK_offen: 8745 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8746 case MCK_SSrcB32: 8747 // When operands have expression values, they will return true for isToken, 8748 // because it is not possible to distinguish between a token and an 8749 // expression at parse time. MatchInstructionImpl() will always try to 8750 // match an operand as a token, when isToken returns true, and when the 8751 // name of the expression is not a valid token, the match will fail, 8752 // so we need to handle it here. 8753 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8754 case MCK_SSrcF32: 8755 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8756 case MCK_SoppBrTarget: 8757 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8758 case MCK_VReg32OrOff: 8759 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8760 case MCK_InterpSlot: 8761 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8762 case MCK_Attr: 8763 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8764 case MCK_AttrChan: 8765 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8766 case MCK_ImmSMEMOffset: 8767 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8768 case MCK_SReg_64: 8769 case MCK_SReg_64_XEXEC: 8770 // Null is defined as a 32-bit register but 8771 // it should also be enabled with 64-bit operands. 8772 // The following code enables it for SReg_64 operands 8773 // used as source and destination. Remaining source 8774 // operands are handled in isInlinableImm. 8775 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8776 default: 8777 return Match_InvalidOperand; 8778 } 8779 } 8780 8781 //===----------------------------------------------------------------------===// 8782 // endpgm 8783 //===----------------------------------------------------------------------===// 8784 8785 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8786 SMLoc S = getLoc(); 8787 int64_t Imm = 0; 8788 8789 if (!parseExpr(Imm)) { 8790 // The operand is optional, if not present default to 0 8791 Imm = 0; 8792 } 8793 8794 if (!isUInt<16>(Imm)) { 8795 Error(S, "expected a 16-bit value"); 8796 return MatchOperand_ParseFail; 8797 } 8798 8799 Operands.push_back( 8800 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8801 return MatchOperand_Success; 8802 } 8803 8804 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8805