1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isDepCtr() const; 822 bool isHwreg() const; 823 bool isSendMsg() const; 824 bool isSwizzle() const; 825 bool isSMRDOffset8() const; 826 bool isSMEMOffset() const; 827 bool isSMRDLiteralOffset() const; 828 bool isDPP8() const; 829 bool isDPPCtrl() const; 830 bool isBLGP() const; 831 bool isCBSZ() const; 832 bool isABID() const; 833 bool isGPRIdxMode() const; 834 bool isS16Imm() const; 835 bool isU16Imm() const; 836 bool isEndpgm() const; 837 838 StringRef getExpressionAsToken() const { 839 assert(isExpr()); 840 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 841 return S->getSymbol().getName(); 842 } 843 844 StringRef getToken() const { 845 assert(isToken()); 846 847 if (Kind == Expression) 848 return getExpressionAsToken(); 849 850 return StringRef(Tok.Data, Tok.Length); 851 } 852 853 int64_t getImm() const { 854 assert(isImm()); 855 return Imm.Val; 856 } 857 858 void setImm(int64_t Val) { 859 assert(isImm()); 860 Imm.Val = Val; 861 } 862 863 ImmTy getImmTy() const { 864 assert(isImm()); 865 return Imm.Type; 866 } 867 868 unsigned getReg() const override { 869 assert(isRegKind()); 870 return Reg.RegNo; 871 } 872 873 SMLoc getStartLoc() const override { 874 return StartLoc; 875 } 876 877 SMLoc getEndLoc() const override { 878 return EndLoc; 879 } 880 881 SMRange getLocRange() const { 882 return SMRange(StartLoc, EndLoc); 883 } 884 885 Modifiers getModifiers() const { 886 assert(isRegKind() || isImmTy(ImmTyNone)); 887 return isRegKind() ? Reg.Mods : Imm.Mods; 888 } 889 890 void setModifiers(Modifiers Mods) { 891 assert(isRegKind() || isImmTy(ImmTyNone)); 892 if (isRegKind()) 893 Reg.Mods = Mods; 894 else 895 Imm.Mods = Mods; 896 } 897 898 bool hasModifiers() const { 899 return getModifiers().hasModifiers(); 900 } 901 902 bool hasFPModifiers() const { 903 return getModifiers().hasFPModifiers(); 904 } 905 906 bool hasIntModifiers() const { 907 return getModifiers().hasIntModifiers(); 908 } 909 910 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 911 912 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 913 914 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 915 916 template <unsigned Bitwidth> 917 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 918 919 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 920 addKImmFPOperands<16>(Inst, N); 921 } 922 923 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 924 addKImmFPOperands<32>(Inst, N); 925 } 926 927 void addRegOperands(MCInst &Inst, unsigned N) const; 928 929 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 930 addRegOperands(Inst, N); 931 } 932 933 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 934 if (isRegKind()) 935 addRegOperands(Inst, N); 936 else if (isExpr()) 937 Inst.addOperand(MCOperand::createExpr(Expr)); 938 else 939 addImmOperands(Inst, N); 940 } 941 942 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 943 Modifiers Mods = getModifiers(); 944 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 945 if (isRegKind()) { 946 addRegOperands(Inst, N); 947 } else { 948 addImmOperands(Inst, N, false); 949 } 950 } 951 952 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 953 assert(!hasIntModifiers()); 954 addRegOrImmWithInputModsOperands(Inst, N); 955 } 956 957 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 958 assert(!hasFPModifiers()); 959 addRegOrImmWithInputModsOperands(Inst, N); 960 } 961 962 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 963 Modifiers Mods = getModifiers(); 964 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 965 assert(isRegKind()); 966 addRegOperands(Inst, N); 967 } 968 969 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 970 assert(!hasIntModifiers()); 971 addRegWithInputModsOperands(Inst, N); 972 } 973 974 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 975 assert(!hasFPModifiers()); 976 addRegWithInputModsOperands(Inst, N); 977 } 978 979 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 980 if (isImm()) 981 addImmOperands(Inst, N); 982 else { 983 assert(isExpr()); 984 Inst.addOperand(MCOperand::createExpr(Expr)); 985 } 986 } 987 988 static void printImmTy(raw_ostream& OS, ImmTy Type) { 989 switch (Type) { 990 case ImmTyNone: OS << "None"; break; 991 case ImmTyGDS: OS << "GDS"; break; 992 case ImmTyLDS: OS << "LDS"; break; 993 case ImmTyOffen: OS << "Offen"; break; 994 case ImmTyIdxen: OS << "Idxen"; break; 995 case ImmTyAddr64: OS << "Addr64"; break; 996 case ImmTyOffset: OS << "Offset"; break; 997 case ImmTyInstOffset: OS << "InstOffset"; break; 998 case ImmTyOffset0: OS << "Offset0"; break; 999 case ImmTyOffset1: OS << "Offset1"; break; 1000 case ImmTyCPol: OS << "CPol"; break; 1001 case ImmTySWZ: OS << "SWZ"; break; 1002 case ImmTyTFE: OS << "TFE"; break; 1003 case ImmTyD16: OS << "D16"; break; 1004 case ImmTyFORMAT: OS << "FORMAT"; break; 1005 case ImmTyClampSI: OS << "ClampSI"; break; 1006 case ImmTyOModSI: OS << "OModSI"; break; 1007 case ImmTyDPP8: OS << "DPP8"; break; 1008 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1009 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1010 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1011 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1012 case ImmTyDppFi: OS << "FI"; break; 1013 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1014 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1015 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1016 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1017 case ImmTyDMask: OS << "DMask"; break; 1018 case ImmTyDim: OS << "Dim"; break; 1019 case ImmTyUNorm: OS << "UNorm"; break; 1020 case ImmTyDA: OS << "DA"; break; 1021 case ImmTyR128A16: OS << "R128A16"; break; 1022 case ImmTyA16: OS << "A16"; break; 1023 case ImmTyLWE: OS << "LWE"; break; 1024 case ImmTyOff: OS << "Off"; break; 1025 case ImmTyExpTgt: OS << "ExpTgt"; break; 1026 case ImmTyExpCompr: OS << "ExpCompr"; break; 1027 case ImmTyExpVM: OS << "ExpVM"; break; 1028 case ImmTyHwreg: OS << "Hwreg"; break; 1029 case ImmTySendMsg: OS << "SendMsg"; break; 1030 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1031 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1032 case ImmTyAttrChan: OS << "AttrChan"; break; 1033 case ImmTyOpSel: OS << "OpSel"; break; 1034 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1035 case ImmTyNegLo: OS << "NegLo"; break; 1036 case ImmTyNegHi: OS << "NegHi"; break; 1037 case ImmTySwizzle: OS << "Swizzle"; break; 1038 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1039 case ImmTyHigh: OS << "High"; break; 1040 case ImmTyBLGP: OS << "BLGP"; break; 1041 case ImmTyCBSZ: OS << "CBSZ"; break; 1042 case ImmTyABID: OS << "ABID"; break; 1043 case ImmTyEndpgm: OS << "Endpgm"; break; 1044 } 1045 } 1046 1047 void print(raw_ostream &OS) const override { 1048 switch (Kind) { 1049 case Register: 1050 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1051 break; 1052 case Immediate: 1053 OS << '<' << getImm(); 1054 if (getImmTy() != ImmTyNone) { 1055 OS << " type: "; printImmTy(OS, getImmTy()); 1056 } 1057 OS << " mods: " << Imm.Mods << '>'; 1058 break; 1059 case Token: 1060 OS << '\'' << getToken() << '\''; 1061 break; 1062 case Expression: 1063 OS << "<expr " << *Expr << '>'; 1064 break; 1065 } 1066 } 1067 1068 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1069 int64_t Val, SMLoc Loc, 1070 ImmTy Type = ImmTyNone, 1071 bool IsFPImm = false) { 1072 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1073 Op->Imm.Val = Val; 1074 Op->Imm.IsFPImm = IsFPImm; 1075 Op->Imm.Kind = ImmKindTyNone; 1076 Op->Imm.Type = Type; 1077 Op->Imm.Mods = Modifiers(); 1078 Op->StartLoc = Loc; 1079 Op->EndLoc = Loc; 1080 return Op; 1081 } 1082 1083 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1084 StringRef Str, SMLoc Loc, 1085 bool HasExplicitEncodingSize = true) { 1086 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1087 Res->Tok.Data = Str.data(); 1088 Res->Tok.Length = Str.size(); 1089 Res->StartLoc = Loc; 1090 Res->EndLoc = Loc; 1091 return Res; 1092 } 1093 1094 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1095 unsigned RegNo, SMLoc S, 1096 SMLoc E) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1098 Op->Reg.RegNo = RegNo; 1099 Op->Reg.Mods = Modifiers(); 1100 Op->StartLoc = S; 1101 Op->EndLoc = E; 1102 return Op; 1103 } 1104 1105 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1106 const class MCExpr *Expr, SMLoc S) { 1107 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1108 Op->Expr = Expr; 1109 Op->StartLoc = S; 1110 Op->EndLoc = S; 1111 return Op; 1112 } 1113 }; 1114 1115 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1116 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1117 return OS; 1118 } 1119 1120 //===----------------------------------------------------------------------===// 1121 // AsmParser 1122 //===----------------------------------------------------------------------===// 1123 1124 // Holds info related to the current kernel, e.g. count of SGPRs used. 1125 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1126 // .amdgpu_hsa_kernel or at EOF. 1127 class KernelScopeInfo { 1128 int SgprIndexUnusedMin = -1; 1129 int VgprIndexUnusedMin = -1; 1130 int AgprIndexUnusedMin = -1; 1131 MCContext *Ctx = nullptr; 1132 MCSubtargetInfo const *MSTI = nullptr; 1133 1134 void usesSgprAt(int i) { 1135 if (i >= SgprIndexUnusedMin) { 1136 SgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol* const Sym = 1139 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1140 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1141 } 1142 } 1143 } 1144 1145 void usesVgprAt(int i) { 1146 if (i >= VgprIndexUnusedMin) { 1147 VgprIndexUnusedMin = ++i; 1148 if (Ctx) { 1149 MCSymbol* const Sym = 1150 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1151 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1152 VgprIndexUnusedMin); 1153 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1154 } 1155 } 1156 } 1157 1158 void usesAgprAt(int i) { 1159 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1160 if (!hasMAIInsts(*MSTI)) 1161 return; 1162 1163 if (i >= AgprIndexUnusedMin) { 1164 AgprIndexUnusedMin = ++i; 1165 if (Ctx) { 1166 MCSymbol* const Sym = 1167 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1168 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1169 1170 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1171 MCSymbol* const vSym = 1172 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1173 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1174 VgprIndexUnusedMin); 1175 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1176 } 1177 } 1178 } 1179 1180 public: 1181 KernelScopeInfo() = default; 1182 1183 void initialize(MCContext &Context) { 1184 Ctx = &Context; 1185 MSTI = Ctx->getSubtargetInfo(); 1186 1187 usesSgprAt(SgprIndexUnusedMin = -1); 1188 usesVgprAt(VgprIndexUnusedMin = -1); 1189 if (hasMAIInsts(*MSTI)) { 1190 usesAgprAt(AgprIndexUnusedMin = -1); 1191 } 1192 } 1193 1194 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1195 unsigned RegWidth) { 1196 switch (RegKind) { 1197 case IS_SGPR: 1198 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1199 break; 1200 case IS_AGPR: 1201 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1202 break; 1203 case IS_VGPR: 1204 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1205 break; 1206 default: 1207 break; 1208 } 1209 } 1210 }; 1211 1212 class AMDGPUAsmParser : public MCTargetAsmParser { 1213 MCAsmParser &Parser; 1214 1215 // Number of extra operands parsed after the first optional operand. 1216 // This may be necessary to skip hardcoded mandatory operands. 1217 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1218 1219 unsigned ForcedEncodingSize = 0; 1220 bool ForcedDPP = false; 1221 bool ForcedSDWA = false; 1222 KernelScopeInfo KernelScope; 1223 unsigned CPolSeen; 1224 1225 /// @name Auto-generated Match Functions 1226 /// { 1227 1228 #define GET_ASSEMBLER_HEADER 1229 #include "AMDGPUGenAsmMatcher.inc" 1230 1231 /// } 1232 1233 private: 1234 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1235 bool OutOfRangeError(SMRange Range); 1236 /// Calculate VGPR/SGPR blocks required for given target, reserved 1237 /// registers, and user-specified NextFreeXGPR values. 1238 /// 1239 /// \param Features [in] Target features, used for bug corrections. 1240 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1241 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1242 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1243 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1244 /// descriptor field, if valid. 1245 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1246 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1247 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1248 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1249 /// \param VGPRBlocks [out] Result VGPR block count. 1250 /// \param SGPRBlocks [out] Result SGPR block count. 1251 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1252 bool FlatScrUsed, bool XNACKUsed, 1253 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1254 SMRange VGPRRange, unsigned NextFreeSGPR, 1255 SMRange SGPRRange, unsigned &VGPRBlocks, 1256 unsigned &SGPRBlocks); 1257 bool ParseDirectiveAMDGCNTarget(); 1258 bool ParseDirectiveAMDHSAKernel(); 1259 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1260 bool ParseDirectiveHSACodeObjectVersion(); 1261 bool ParseDirectiveHSACodeObjectISA(); 1262 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1263 bool ParseDirectiveAMDKernelCodeT(); 1264 // TODO: Possibly make subtargetHasRegister const. 1265 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1266 bool ParseDirectiveAMDGPUHsaKernel(); 1267 1268 bool ParseDirectiveISAVersion(); 1269 bool ParseDirectiveHSAMetadata(); 1270 bool ParseDirectivePALMetadataBegin(); 1271 bool ParseDirectivePALMetadata(); 1272 bool ParseDirectiveAMDGPULDS(); 1273 1274 /// Common code to parse out a block of text (typically YAML) between start and 1275 /// end directives. 1276 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1277 const char *AssemblerDirectiveEnd, 1278 std::string &CollectString); 1279 1280 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1281 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1282 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1283 unsigned &RegNum, unsigned &RegWidth, 1284 bool RestoreOnFailure = false); 1285 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1286 unsigned &RegNum, unsigned &RegWidth, 1287 SmallVectorImpl<AsmToken> &Tokens); 1288 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1289 unsigned &RegWidth, 1290 SmallVectorImpl<AsmToken> &Tokens); 1291 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1292 unsigned &RegWidth, 1293 SmallVectorImpl<AsmToken> &Tokens); 1294 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1295 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1296 bool ParseRegRange(unsigned& Num, unsigned& Width); 1297 unsigned getRegularReg(RegisterKind RegKind, 1298 unsigned RegNum, 1299 unsigned RegWidth, 1300 SMLoc Loc); 1301 1302 bool isRegister(); 1303 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1304 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1305 void initializeGprCountSymbol(RegisterKind RegKind); 1306 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1307 unsigned RegWidth); 1308 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1309 bool IsAtomic, bool IsLds = false); 1310 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1311 bool IsGdsHardcoded); 1312 1313 public: 1314 enum AMDGPUMatchResultTy { 1315 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1316 }; 1317 enum OperandMode { 1318 OperandMode_Default, 1319 OperandMode_NSA, 1320 }; 1321 1322 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1323 1324 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1325 const MCInstrInfo &MII, 1326 const MCTargetOptions &Options) 1327 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1328 MCAsmParserExtension::Initialize(Parser); 1329 1330 if (getFeatureBits().none()) { 1331 // Set default features. 1332 copySTI().ToggleFeature("southern-islands"); 1333 } 1334 1335 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1336 1337 { 1338 // TODO: make those pre-defined variables read-only. 1339 // Currently there is none suitable machinery in the core llvm-mc for this. 1340 // MCSymbol::isRedefinable is intended for another purpose, and 1341 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1342 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1343 MCContext &Ctx = getContext(); 1344 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1345 MCSymbol *Sym = 1346 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1350 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1351 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1352 } else { 1353 MCSymbol *Sym = 1354 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1355 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1356 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1357 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1358 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1359 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1360 } 1361 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1362 initializeGprCountSymbol(IS_VGPR); 1363 initializeGprCountSymbol(IS_SGPR); 1364 } else 1365 KernelScope.initialize(getContext()); 1366 } 1367 } 1368 1369 bool hasMIMG_R128() const { 1370 return AMDGPU::hasMIMG_R128(getSTI()); 1371 } 1372 1373 bool hasPackedD16() const { 1374 return AMDGPU::hasPackedD16(getSTI()); 1375 } 1376 1377 bool hasGFX10A16() const { 1378 return AMDGPU::hasGFX10A16(getSTI()); 1379 } 1380 1381 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1382 1383 bool isSI() const { 1384 return AMDGPU::isSI(getSTI()); 1385 } 1386 1387 bool isCI() const { 1388 return AMDGPU::isCI(getSTI()); 1389 } 1390 1391 bool isVI() const { 1392 return AMDGPU::isVI(getSTI()); 1393 } 1394 1395 bool isGFX9() const { 1396 return AMDGPU::isGFX9(getSTI()); 1397 } 1398 1399 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1400 bool isGFX90A() const { 1401 return AMDGPU::isGFX90A(getSTI()); 1402 } 1403 1404 bool isGFX940() const { 1405 return AMDGPU::isGFX940(getSTI()); 1406 } 1407 1408 bool isGFX9Plus() const { 1409 return AMDGPU::isGFX9Plus(getSTI()); 1410 } 1411 1412 bool isGFX10() const { 1413 return AMDGPU::isGFX10(getSTI()); 1414 } 1415 1416 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1417 1418 bool isGFX10_BEncoding() const { 1419 return AMDGPU::isGFX10_BEncoding(getSTI()); 1420 } 1421 1422 bool hasInv2PiInlineImm() const { 1423 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1424 } 1425 1426 bool hasFlatOffsets() const { 1427 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1428 } 1429 1430 bool hasArchitectedFlatScratch() const { 1431 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1432 } 1433 1434 bool hasSGPR102_SGPR103() const { 1435 return !isVI() && !isGFX9(); 1436 } 1437 1438 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1439 1440 bool hasIntClamp() const { 1441 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1442 } 1443 1444 AMDGPUTargetStreamer &getTargetStreamer() { 1445 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1446 return static_cast<AMDGPUTargetStreamer &>(TS); 1447 } 1448 1449 const MCRegisterInfo *getMRI() const { 1450 // We need this const_cast because for some reason getContext() is not const 1451 // in MCAsmParser. 1452 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1453 } 1454 1455 const MCInstrInfo *getMII() const { 1456 return &MII; 1457 } 1458 1459 const FeatureBitset &getFeatureBits() const { 1460 return getSTI().getFeatureBits(); 1461 } 1462 1463 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1464 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1465 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1466 1467 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1468 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1469 bool isForcedDPP() const { return ForcedDPP; } 1470 bool isForcedSDWA() const { return ForcedSDWA; } 1471 ArrayRef<unsigned> getMatchedVariants() const; 1472 StringRef getMatchedVariantName() const; 1473 1474 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1475 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1476 bool RestoreOnFailure); 1477 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1478 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1479 SMLoc &EndLoc) override; 1480 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1481 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1482 unsigned Kind) override; 1483 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1484 OperandVector &Operands, MCStreamer &Out, 1485 uint64_t &ErrorInfo, 1486 bool MatchingInlineAsm) override; 1487 bool ParseDirective(AsmToken DirectiveID) override; 1488 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1489 OperandMode Mode = OperandMode_Default); 1490 StringRef parseMnemonicSuffix(StringRef Name); 1491 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1492 SMLoc NameLoc, OperandVector &Operands) override; 1493 //bool ProcessInstruction(MCInst &Inst); 1494 1495 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1496 1497 OperandMatchResultTy 1498 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1499 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1500 bool (*ConvertResult)(int64_t &) = nullptr); 1501 1502 OperandMatchResultTy 1503 parseOperandArrayWithPrefix(const char *Prefix, 1504 OperandVector &Operands, 1505 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1506 bool (*ConvertResult)(int64_t&) = nullptr); 1507 1508 OperandMatchResultTy 1509 parseNamedBit(StringRef Name, OperandVector &Operands, 1510 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1511 OperandMatchResultTy parseCPol(OperandVector &Operands); 1512 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1513 StringRef &Value, 1514 SMLoc &StringLoc); 1515 1516 bool isModifier(); 1517 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1518 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1519 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1520 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1521 bool parseSP3NegModifier(); 1522 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1523 OperandMatchResultTy parseReg(OperandVector &Operands); 1524 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1525 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1526 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1527 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1528 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1529 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1530 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1531 OperandMatchResultTy parseUfmt(int64_t &Format); 1532 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1533 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1534 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1535 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1536 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1537 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1538 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1539 1540 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1541 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1542 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1543 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1544 1545 bool parseCnt(int64_t &IntVal); 1546 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1547 1548 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1549 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1550 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1551 1552 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1553 1554 private: 1555 struct OperandInfoTy { 1556 SMLoc Loc; 1557 int64_t Id; 1558 bool IsSymbolic = false; 1559 bool IsDefined = false; 1560 1561 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1562 }; 1563 1564 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1565 bool validateSendMsg(const OperandInfoTy &Msg, 1566 const OperandInfoTy &Op, 1567 const OperandInfoTy &Stream); 1568 1569 bool parseHwregBody(OperandInfoTy &HwReg, 1570 OperandInfoTy &Offset, 1571 OperandInfoTy &Width); 1572 bool validateHwreg(const OperandInfoTy &HwReg, 1573 const OperandInfoTy &Offset, 1574 const OperandInfoTy &Width); 1575 1576 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1577 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1578 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1579 1580 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1581 const OperandVector &Operands) const; 1582 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1583 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1584 SMLoc getLitLoc(const OperandVector &Operands) const; 1585 SMLoc getConstLoc(const OperandVector &Operands) const; 1586 1587 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1588 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1589 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateSOPLiteral(const MCInst &Inst) const; 1591 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1592 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1593 bool validateIntClampSupported(const MCInst &Inst); 1594 bool validateMIMGAtomicDMask(const MCInst &Inst); 1595 bool validateMIMGGatherDMask(const MCInst &Inst); 1596 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1597 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1598 bool validateMIMGAddrSize(const MCInst &Inst); 1599 bool validateMIMGD16(const MCInst &Inst); 1600 bool validateMIMGDim(const MCInst &Inst); 1601 bool validateMIMGMSAA(const MCInst &Inst); 1602 bool validateOpSel(const MCInst &Inst); 1603 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateVccOperand(unsigned Reg) const; 1605 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1606 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1607 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1608 bool validateAGPRLdSt(const MCInst &Inst) const; 1609 bool validateVGPRAlign(const MCInst &Inst) const; 1610 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1611 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1612 bool validateDivScale(const MCInst &Inst); 1613 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1614 const SMLoc &IDLoc); 1615 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1616 unsigned getConstantBusLimit(unsigned Opcode) const; 1617 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1618 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1619 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1620 1621 bool isSupportedMnemo(StringRef Mnemo, 1622 const FeatureBitset &FBS); 1623 bool isSupportedMnemo(StringRef Mnemo, 1624 const FeatureBitset &FBS, 1625 ArrayRef<unsigned> Variants); 1626 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1627 1628 bool isId(const StringRef Id) const; 1629 bool isId(const AsmToken &Token, const StringRef Id) const; 1630 bool isToken(const AsmToken::TokenKind Kind) const; 1631 bool trySkipId(const StringRef Id); 1632 bool trySkipId(const StringRef Pref, const StringRef Id); 1633 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1634 bool trySkipToken(const AsmToken::TokenKind Kind); 1635 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1636 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1637 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1638 1639 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1640 AsmToken::TokenKind getTokenKind() const; 1641 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1642 bool parseExpr(OperandVector &Operands); 1643 StringRef getTokenStr() const; 1644 AsmToken peekToken(); 1645 AsmToken getToken() const; 1646 SMLoc getLoc() const; 1647 void lex(); 1648 1649 public: 1650 void onBeginOfFile() override; 1651 1652 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1653 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1654 1655 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1656 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1657 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1658 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1659 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1660 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1661 1662 bool parseSwizzleOperand(int64_t &Op, 1663 const unsigned MinVal, 1664 const unsigned MaxVal, 1665 const StringRef ErrMsg, 1666 SMLoc &Loc); 1667 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1668 const unsigned MinVal, 1669 const unsigned MaxVal, 1670 const StringRef ErrMsg); 1671 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1672 bool parseSwizzleOffset(int64_t &Imm); 1673 bool parseSwizzleMacro(int64_t &Imm); 1674 bool parseSwizzleQuadPerm(int64_t &Imm); 1675 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1676 bool parseSwizzleBroadcast(int64_t &Imm); 1677 bool parseSwizzleSwap(int64_t &Imm); 1678 bool parseSwizzleReverse(int64_t &Imm); 1679 1680 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1681 int64_t parseGPRIdxMacro(); 1682 1683 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1684 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1685 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1686 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1687 1688 AMDGPUOperand::Ptr defaultCPol() const; 1689 1690 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1691 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1692 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1693 AMDGPUOperand::Ptr defaultFlatOffset() const; 1694 1695 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1696 1697 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1698 OptionalImmIndexMap &OptionalIdx); 1699 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1700 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1701 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1702 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1703 OptionalImmIndexMap &OptionalIdx); 1704 1705 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1706 1707 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1708 bool IsAtomic = false); 1709 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1710 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1711 1712 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1713 1714 bool parseDimId(unsigned &Encoding); 1715 OperandMatchResultTy parseDim(OperandVector &Operands); 1716 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1717 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1718 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1719 int64_t parseDPPCtrlSel(StringRef Ctrl); 1720 int64_t parseDPPCtrlPerm(); 1721 AMDGPUOperand::Ptr defaultRowMask() const; 1722 AMDGPUOperand::Ptr defaultBankMask() const; 1723 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1724 AMDGPUOperand::Ptr defaultFI() const; 1725 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1726 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1727 1728 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1729 AMDGPUOperand::ImmTy Type); 1730 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1731 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1732 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1733 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1734 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1735 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1736 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1737 uint64_t BasicInstType, 1738 bool SkipDstVcc = false, 1739 bool SkipSrcVcc = false); 1740 1741 AMDGPUOperand::Ptr defaultBLGP() const; 1742 AMDGPUOperand::Ptr defaultCBSZ() const; 1743 AMDGPUOperand::Ptr defaultABID() const; 1744 1745 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1746 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1747 }; 1748 1749 struct OptionalOperand { 1750 const char *Name; 1751 AMDGPUOperand::ImmTy Type; 1752 bool IsBit; 1753 bool (*ConvertResult)(int64_t&); 1754 }; 1755 1756 } // end anonymous namespace 1757 1758 // May be called with integer type with equivalent bitwidth. 1759 static const fltSemantics *getFltSemantics(unsigned Size) { 1760 switch (Size) { 1761 case 4: 1762 return &APFloat::IEEEsingle(); 1763 case 8: 1764 return &APFloat::IEEEdouble(); 1765 case 2: 1766 return &APFloat::IEEEhalf(); 1767 default: 1768 llvm_unreachable("unsupported fp type"); 1769 } 1770 } 1771 1772 static const fltSemantics *getFltSemantics(MVT VT) { 1773 return getFltSemantics(VT.getSizeInBits() / 8); 1774 } 1775 1776 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1777 switch (OperandType) { 1778 case AMDGPU::OPERAND_REG_IMM_INT32: 1779 case AMDGPU::OPERAND_REG_IMM_FP32: 1780 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1781 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1782 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1783 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1784 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1785 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1786 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1787 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1788 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1789 case AMDGPU::OPERAND_KIMM32: 1790 return &APFloat::IEEEsingle(); 1791 case AMDGPU::OPERAND_REG_IMM_INT64: 1792 case AMDGPU::OPERAND_REG_IMM_FP64: 1793 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1794 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1795 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1796 return &APFloat::IEEEdouble(); 1797 case AMDGPU::OPERAND_REG_IMM_INT16: 1798 case AMDGPU::OPERAND_REG_IMM_FP16: 1799 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1800 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1801 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1802 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1808 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1809 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1810 case AMDGPU::OPERAND_KIMM16: 1811 return &APFloat::IEEEhalf(); 1812 default: 1813 llvm_unreachable("unsupported fp type"); 1814 } 1815 } 1816 1817 //===----------------------------------------------------------------------===// 1818 // Operand 1819 //===----------------------------------------------------------------------===// 1820 1821 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1822 bool Lost; 1823 1824 // Convert literal to single precision 1825 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1826 APFloat::rmNearestTiesToEven, 1827 &Lost); 1828 // We allow precision lost but not overflow or underflow 1829 if (Status != APFloat::opOK && 1830 Lost && 1831 ((Status & APFloat::opOverflow) != 0 || 1832 (Status & APFloat::opUnderflow) != 0)) { 1833 return false; 1834 } 1835 1836 return true; 1837 } 1838 1839 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1840 return isUIntN(Size, Val) || isIntN(Size, Val); 1841 } 1842 1843 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1844 if (VT.getScalarType() == MVT::i16) { 1845 // FP immediate values are broken. 1846 return isInlinableIntLiteral(Val); 1847 } 1848 1849 // f16/v2f16 operands work correctly for all values. 1850 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1851 } 1852 1853 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1854 1855 // This is a hack to enable named inline values like 1856 // shared_base with both 32-bit and 64-bit operands. 1857 // Note that these values are defined as 1858 // 32-bit operands only. 1859 if (isInlineValue()) { 1860 return true; 1861 } 1862 1863 if (!isImmTy(ImmTyNone)) { 1864 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1865 return false; 1866 } 1867 // TODO: We should avoid using host float here. It would be better to 1868 // check the float bit values which is what a few other places do. 1869 // We've had bot failures before due to weird NaN support on mips hosts. 1870 1871 APInt Literal(64, Imm.Val); 1872 1873 if (Imm.IsFPImm) { // We got fp literal token 1874 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1875 return AMDGPU::isInlinableLiteral64(Imm.Val, 1876 AsmParser->hasInv2PiInlineImm()); 1877 } 1878 1879 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1880 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1881 return false; 1882 1883 if (type.getScalarSizeInBits() == 16) { 1884 return isInlineableLiteralOp16( 1885 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1886 type, AsmParser->hasInv2PiInlineImm()); 1887 } 1888 1889 // Check if single precision literal is inlinable 1890 return AMDGPU::isInlinableLiteral32( 1891 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1892 AsmParser->hasInv2PiInlineImm()); 1893 } 1894 1895 // We got int literal token. 1896 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1897 return AMDGPU::isInlinableLiteral64(Imm.Val, 1898 AsmParser->hasInv2PiInlineImm()); 1899 } 1900 1901 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1902 return false; 1903 } 1904 1905 if (type.getScalarSizeInBits() == 16) { 1906 return isInlineableLiteralOp16( 1907 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1908 type, AsmParser->hasInv2PiInlineImm()); 1909 } 1910 1911 return AMDGPU::isInlinableLiteral32( 1912 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1913 AsmParser->hasInv2PiInlineImm()); 1914 } 1915 1916 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1917 // Check that this immediate can be added as literal 1918 if (!isImmTy(ImmTyNone)) { 1919 return false; 1920 } 1921 1922 if (!Imm.IsFPImm) { 1923 // We got int literal token. 1924 1925 if (type == MVT::f64 && hasFPModifiers()) { 1926 // Cannot apply fp modifiers to int literals preserving the same semantics 1927 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1928 // disable these cases. 1929 return false; 1930 } 1931 1932 unsigned Size = type.getSizeInBits(); 1933 if (Size == 64) 1934 Size = 32; 1935 1936 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1937 // types. 1938 return isSafeTruncation(Imm.Val, Size); 1939 } 1940 1941 // We got fp literal token 1942 if (type == MVT::f64) { // Expected 64-bit fp operand 1943 // We would set low 64-bits of literal to zeroes but we accept this literals 1944 return true; 1945 } 1946 1947 if (type == MVT::i64) { // Expected 64-bit int operand 1948 // We don't allow fp literals in 64-bit integer instructions. It is 1949 // unclear how we should encode them. 1950 return false; 1951 } 1952 1953 // We allow fp literals with f16x2 operands assuming that the specified 1954 // literal goes into the lower half and the upper half is zero. We also 1955 // require that the literal may be losslessly converted to f16. 1956 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1957 (type == MVT::v2i16)? MVT::i16 : 1958 (type == MVT::v2f32)? MVT::f32 : type; 1959 1960 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1961 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1962 } 1963 1964 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1965 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1966 } 1967 1968 bool AMDGPUOperand::isVRegWithInputMods() const { 1969 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1970 // GFX90A allows DPP on 64-bit operands. 1971 (isRegClass(AMDGPU::VReg_64RegClassID) && 1972 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1973 } 1974 1975 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1976 if (AsmParser->isVI()) 1977 return isVReg32(); 1978 else if (AsmParser->isGFX9Plus()) 1979 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1980 else 1981 return false; 1982 } 1983 1984 bool AMDGPUOperand::isSDWAFP16Operand() const { 1985 return isSDWAOperand(MVT::f16); 1986 } 1987 1988 bool AMDGPUOperand::isSDWAFP32Operand() const { 1989 return isSDWAOperand(MVT::f32); 1990 } 1991 1992 bool AMDGPUOperand::isSDWAInt16Operand() const { 1993 return isSDWAOperand(MVT::i16); 1994 } 1995 1996 bool AMDGPUOperand::isSDWAInt32Operand() const { 1997 return isSDWAOperand(MVT::i32); 1998 } 1999 2000 bool AMDGPUOperand::isBoolReg() const { 2001 auto FB = AsmParser->getFeatureBits(); 2002 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2003 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2004 } 2005 2006 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2007 { 2008 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2009 assert(Size == 2 || Size == 4 || Size == 8); 2010 2011 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2012 2013 if (Imm.Mods.Abs) { 2014 Val &= ~FpSignMask; 2015 } 2016 if (Imm.Mods.Neg) { 2017 Val ^= FpSignMask; 2018 } 2019 2020 return Val; 2021 } 2022 2023 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2024 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2025 Inst.getNumOperands())) { 2026 addLiteralImmOperand(Inst, Imm.Val, 2027 ApplyModifiers & 2028 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2029 } else { 2030 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2031 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2032 setImmKindNone(); 2033 } 2034 } 2035 2036 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2037 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2038 auto OpNum = Inst.getNumOperands(); 2039 // Check that this operand accepts literals 2040 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2041 2042 if (ApplyModifiers) { 2043 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2044 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2045 Val = applyInputFPModifiers(Val, Size); 2046 } 2047 2048 APInt Literal(64, Val); 2049 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2050 2051 if (Imm.IsFPImm) { // We got fp literal token 2052 switch (OpTy) { 2053 case AMDGPU::OPERAND_REG_IMM_INT64: 2054 case AMDGPU::OPERAND_REG_IMM_FP64: 2055 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2056 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2057 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2058 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2059 AsmParser->hasInv2PiInlineImm())) { 2060 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2061 setImmKindConst(); 2062 return; 2063 } 2064 2065 // Non-inlineable 2066 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2067 // For fp operands we check if low 32 bits are zeros 2068 if (Literal.getLoBits(32) != 0) { 2069 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2070 "Can't encode literal as exact 64-bit floating-point operand. " 2071 "Low 32-bits will be set to zero"); 2072 } 2073 2074 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2075 setImmKindLiteral(); 2076 return; 2077 } 2078 2079 // We don't allow fp literals in 64-bit integer instructions. It is 2080 // unclear how we should encode them. This case should be checked earlier 2081 // in predicate methods (isLiteralImm()) 2082 llvm_unreachable("fp literal in 64-bit integer instruction."); 2083 2084 case AMDGPU::OPERAND_REG_IMM_INT32: 2085 case AMDGPU::OPERAND_REG_IMM_FP32: 2086 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2087 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2088 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2089 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2090 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2091 case AMDGPU::OPERAND_REG_IMM_INT16: 2092 case AMDGPU::OPERAND_REG_IMM_FP16: 2093 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2094 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2095 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2096 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2097 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2098 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2099 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2100 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2102 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2103 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2104 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2105 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2106 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2107 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2108 case AMDGPU::OPERAND_KIMM32: 2109 case AMDGPU::OPERAND_KIMM16: { 2110 bool lost; 2111 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2112 // Convert literal to single precision 2113 FPLiteral.convert(*getOpFltSemantics(OpTy), 2114 APFloat::rmNearestTiesToEven, &lost); 2115 // We allow precision lost but not overflow or underflow. This should be 2116 // checked earlier in isLiteralImm() 2117 2118 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2119 Inst.addOperand(MCOperand::createImm(ImmVal)); 2120 setImmKindLiteral(); 2121 return; 2122 } 2123 default: 2124 llvm_unreachable("invalid operand size"); 2125 } 2126 2127 return; 2128 } 2129 2130 // We got int literal token. 2131 // Only sign extend inline immediates. 2132 switch (OpTy) { 2133 case AMDGPU::OPERAND_REG_IMM_INT32: 2134 case AMDGPU::OPERAND_REG_IMM_FP32: 2135 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2136 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2137 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2138 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2139 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2140 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2141 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2142 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2143 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2144 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2145 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2146 if (isSafeTruncation(Val, 32) && 2147 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2148 AsmParser->hasInv2PiInlineImm())) { 2149 Inst.addOperand(MCOperand::createImm(Val)); 2150 setImmKindConst(); 2151 return; 2152 } 2153 2154 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2155 setImmKindLiteral(); 2156 return; 2157 2158 case AMDGPU::OPERAND_REG_IMM_INT64: 2159 case AMDGPU::OPERAND_REG_IMM_FP64: 2160 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2161 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2163 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2164 Inst.addOperand(MCOperand::createImm(Val)); 2165 setImmKindConst(); 2166 return; 2167 } 2168 2169 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2170 setImmKindLiteral(); 2171 return; 2172 2173 case AMDGPU::OPERAND_REG_IMM_INT16: 2174 case AMDGPU::OPERAND_REG_IMM_FP16: 2175 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2176 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2177 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2179 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2180 if (isSafeTruncation(Val, 16) && 2181 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2182 AsmParser->hasInv2PiInlineImm())) { 2183 Inst.addOperand(MCOperand::createImm(Val)); 2184 setImmKindConst(); 2185 return; 2186 } 2187 2188 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2189 setImmKindLiteral(); 2190 return; 2191 2192 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2193 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2194 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2195 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2196 assert(isSafeTruncation(Val, 16)); 2197 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2198 AsmParser->hasInv2PiInlineImm())); 2199 2200 Inst.addOperand(MCOperand::createImm(Val)); 2201 return; 2202 } 2203 case AMDGPU::OPERAND_KIMM32: 2204 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2205 setImmKindNone(); 2206 return; 2207 case AMDGPU::OPERAND_KIMM16: 2208 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2209 setImmKindNone(); 2210 return; 2211 default: 2212 llvm_unreachable("invalid operand size"); 2213 } 2214 } 2215 2216 template <unsigned Bitwidth> 2217 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2218 APInt Literal(64, Imm.Val); 2219 setImmKindNone(); 2220 2221 if (!Imm.IsFPImm) { 2222 // We got int literal token. 2223 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2224 return; 2225 } 2226 2227 bool Lost; 2228 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2229 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2230 APFloat::rmNearestTiesToEven, &Lost); 2231 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2232 } 2233 2234 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2235 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2236 } 2237 2238 static bool isInlineValue(unsigned Reg) { 2239 switch (Reg) { 2240 case AMDGPU::SRC_SHARED_BASE: 2241 case AMDGPU::SRC_SHARED_LIMIT: 2242 case AMDGPU::SRC_PRIVATE_BASE: 2243 case AMDGPU::SRC_PRIVATE_LIMIT: 2244 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2245 return true; 2246 case AMDGPU::SRC_VCCZ: 2247 case AMDGPU::SRC_EXECZ: 2248 case AMDGPU::SRC_SCC: 2249 return true; 2250 case AMDGPU::SGPR_NULL: 2251 return true; 2252 default: 2253 return false; 2254 } 2255 } 2256 2257 bool AMDGPUOperand::isInlineValue() const { 2258 return isRegKind() && ::isInlineValue(getReg()); 2259 } 2260 2261 //===----------------------------------------------------------------------===// 2262 // AsmParser 2263 //===----------------------------------------------------------------------===// 2264 2265 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2266 if (Is == IS_VGPR) { 2267 switch (RegWidth) { 2268 default: return -1; 2269 case 32: 2270 return AMDGPU::VGPR_32RegClassID; 2271 case 64: 2272 return AMDGPU::VReg_64RegClassID; 2273 case 96: 2274 return AMDGPU::VReg_96RegClassID; 2275 case 128: 2276 return AMDGPU::VReg_128RegClassID; 2277 case 160: 2278 return AMDGPU::VReg_160RegClassID; 2279 case 192: 2280 return AMDGPU::VReg_192RegClassID; 2281 case 224: 2282 return AMDGPU::VReg_224RegClassID; 2283 case 256: 2284 return AMDGPU::VReg_256RegClassID; 2285 case 512: 2286 return AMDGPU::VReg_512RegClassID; 2287 case 1024: 2288 return AMDGPU::VReg_1024RegClassID; 2289 } 2290 } else if (Is == IS_TTMP) { 2291 switch (RegWidth) { 2292 default: return -1; 2293 case 32: 2294 return AMDGPU::TTMP_32RegClassID; 2295 case 64: 2296 return AMDGPU::TTMP_64RegClassID; 2297 case 128: 2298 return AMDGPU::TTMP_128RegClassID; 2299 case 256: 2300 return AMDGPU::TTMP_256RegClassID; 2301 case 512: 2302 return AMDGPU::TTMP_512RegClassID; 2303 } 2304 } else if (Is == IS_SGPR) { 2305 switch (RegWidth) { 2306 default: return -1; 2307 case 32: 2308 return AMDGPU::SGPR_32RegClassID; 2309 case 64: 2310 return AMDGPU::SGPR_64RegClassID; 2311 case 96: 2312 return AMDGPU::SGPR_96RegClassID; 2313 case 128: 2314 return AMDGPU::SGPR_128RegClassID; 2315 case 160: 2316 return AMDGPU::SGPR_160RegClassID; 2317 case 192: 2318 return AMDGPU::SGPR_192RegClassID; 2319 case 224: 2320 return AMDGPU::SGPR_224RegClassID; 2321 case 256: 2322 return AMDGPU::SGPR_256RegClassID; 2323 case 512: 2324 return AMDGPU::SGPR_512RegClassID; 2325 } 2326 } else if (Is == IS_AGPR) { 2327 switch (RegWidth) { 2328 default: return -1; 2329 case 32: 2330 return AMDGPU::AGPR_32RegClassID; 2331 case 64: 2332 return AMDGPU::AReg_64RegClassID; 2333 case 96: 2334 return AMDGPU::AReg_96RegClassID; 2335 case 128: 2336 return AMDGPU::AReg_128RegClassID; 2337 case 160: 2338 return AMDGPU::AReg_160RegClassID; 2339 case 192: 2340 return AMDGPU::AReg_192RegClassID; 2341 case 224: 2342 return AMDGPU::AReg_224RegClassID; 2343 case 256: 2344 return AMDGPU::AReg_256RegClassID; 2345 case 512: 2346 return AMDGPU::AReg_512RegClassID; 2347 case 1024: 2348 return AMDGPU::AReg_1024RegClassID; 2349 } 2350 } 2351 return -1; 2352 } 2353 2354 static unsigned getSpecialRegForName(StringRef RegName) { 2355 return StringSwitch<unsigned>(RegName) 2356 .Case("exec", AMDGPU::EXEC) 2357 .Case("vcc", AMDGPU::VCC) 2358 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2359 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2360 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2361 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2362 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2363 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2364 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2365 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2366 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2367 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2368 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2369 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2370 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2371 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2372 .Case("m0", AMDGPU::M0) 2373 .Case("vccz", AMDGPU::SRC_VCCZ) 2374 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2375 .Case("execz", AMDGPU::SRC_EXECZ) 2376 .Case("src_execz", AMDGPU::SRC_EXECZ) 2377 .Case("scc", AMDGPU::SRC_SCC) 2378 .Case("src_scc", AMDGPU::SRC_SCC) 2379 .Case("tba", AMDGPU::TBA) 2380 .Case("tma", AMDGPU::TMA) 2381 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2382 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2383 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2384 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2385 .Case("vcc_lo", AMDGPU::VCC_LO) 2386 .Case("vcc_hi", AMDGPU::VCC_HI) 2387 .Case("exec_lo", AMDGPU::EXEC_LO) 2388 .Case("exec_hi", AMDGPU::EXEC_HI) 2389 .Case("tma_lo", AMDGPU::TMA_LO) 2390 .Case("tma_hi", AMDGPU::TMA_HI) 2391 .Case("tba_lo", AMDGPU::TBA_LO) 2392 .Case("tba_hi", AMDGPU::TBA_HI) 2393 .Case("pc", AMDGPU::PC_REG) 2394 .Case("null", AMDGPU::SGPR_NULL) 2395 .Default(AMDGPU::NoRegister); 2396 } 2397 2398 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2399 SMLoc &EndLoc, bool RestoreOnFailure) { 2400 auto R = parseRegister(); 2401 if (!R) return true; 2402 assert(R->isReg()); 2403 RegNo = R->getReg(); 2404 StartLoc = R->getStartLoc(); 2405 EndLoc = R->getEndLoc(); 2406 return false; 2407 } 2408 2409 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2410 SMLoc &EndLoc) { 2411 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2412 } 2413 2414 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2415 SMLoc &StartLoc, 2416 SMLoc &EndLoc) { 2417 bool Result = 2418 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2419 bool PendingErrors = getParser().hasPendingError(); 2420 getParser().clearPendingErrors(); 2421 if (PendingErrors) 2422 return MatchOperand_ParseFail; 2423 if (Result) 2424 return MatchOperand_NoMatch; 2425 return MatchOperand_Success; 2426 } 2427 2428 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2429 RegisterKind RegKind, unsigned Reg1, 2430 SMLoc Loc) { 2431 switch (RegKind) { 2432 case IS_SPECIAL: 2433 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2434 Reg = AMDGPU::EXEC; 2435 RegWidth = 64; 2436 return true; 2437 } 2438 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2439 Reg = AMDGPU::FLAT_SCR; 2440 RegWidth = 64; 2441 return true; 2442 } 2443 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2444 Reg = AMDGPU::XNACK_MASK; 2445 RegWidth = 64; 2446 return true; 2447 } 2448 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2449 Reg = AMDGPU::VCC; 2450 RegWidth = 64; 2451 return true; 2452 } 2453 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2454 Reg = AMDGPU::TBA; 2455 RegWidth = 64; 2456 return true; 2457 } 2458 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2459 Reg = AMDGPU::TMA; 2460 RegWidth = 64; 2461 return true; 2462 } 2463 Error(Loc, "register does not fit in the list"); 2464 return false; 2465 case IS_VGPR: 2466 case IS_SGPR: 2467 case IS_AGPR: 2468 case IS_TTMP: 2469 if (Reg1 != Reg + RegWidth / 32) { 2470 Error(Loc, "registers in a list must have consecutive indices"); 2471 return false; 2472 } 2473 RegWidth += 32; 2474 return true; 2475 default: 2476 llvm_unreachable("unexpected register kind"); 2477 } 2478 } 2479 2480 struct RegInfo { 2481 StringLiteral Name; 2482 RegisterKind Kind; 2483 }; 2484 2485 static constexpr RegInfo RegularRegisters[] = { 2486 {{"v"}, IS_VGPR}, 2487 {{"s"}, IS_SGPR}, 2488 {{"ttmp"}, IS_TTMP}, 2489 {{"acc"}, IS_AGPR}, 2490 {{"a"}, IS_AGPR}, 2491 }; 2492 2493 static bool isRegularReg(RegisterKind Kind) { 2494 return Kind == IS_VGPR || 2495 Kind == IS_SGPR || 2496 Kind == IS_TTMP || 2497 Kind == IS_AGPR; 2498 } 2499 2500 static const RegInfo* getRegularRegInfo(StringRef Str) { 2501 for (const RegInfo &Reg : RegularRegisters) 2502 if (Str.startswith(Reg.Name)) 2503 return &Reg; 2504 return nullptr; 2505 } 2506 2507 static bool getRegNum(StringRef Str, unsigned& Num) { 2508 return !Str.getAsInteger(10, Num); 2509 } 2510 2511 bool 2512 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2513 const AsmToken &NextToken) const { 2514 2515 // A list of consecutive registers: [s0,s1,s2,s3] 2516 if (Token.is(AsmToken::LBrac)) 2517 return true; 2518 2519 if (!Token.is(AsmToken::Identifier)) 2520 return false; 2521 2522 // A single register like s0 or a range of registers like s[0:1] 2523 2524 StringRef Str = Token.getString(); 2525 const RegInfo *Reg = getRegularRegInfo(Str); 2526 if (Reg) { 2527 StringRef RegName = Reg->Name; 2528 StringRef RegSuffix = Str.substr(RegName.size()); 2529 if (!RegSuffix.empty()) { 2530 unsigned Num; 2531 // A single register with an index: rXX 2532 if (getRegNum(RegSuffix, Num)) 2533 return true; 2534 } else { 2535 // A range of registers: r[XX:YY]. 2536 if (NextToken.is(AsmToken::LBrac)) 2537 return true; 2538 } 2539 } 2540 2541 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2542 } 2543 2544 bool 2545 AMDGPUAsmParser::isRegister() 2546 { 2547 return isRegister(getToken(), peekToken()); 2548 } 2549 2550 unsigned 2551 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2552 unsigned RegNum, 2553 unsigned RegWidth, 2554 SMLoc Loc) { 2555 2556 assert(isRegularReg(RegKind)); 2557 2558 unsigned AlignSize = 1; 2559 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2560 // SGPR and TTMP registers must be aligned. 2561 // Max required alignment is 4 dwords. 2562 AlignSize = std::min(RegWidth / 32, 4u); 2563 } 2564 2565 if (RegNum % AlignSize != 0) { 2566 Error(Loc, "invalid register alignment"); 2567 return AMDGPU::NoRegister; 2568 } 2569 2570 unsigned RegIdx = RegNum / AlignSize; 2571 int RCID = getRegClass(RegKind, RegWidth); 2572 if (RCID == -1) { 2573 Error(Loc, "invalid or unsupported register size"); 2574 return AMDGPU::NoRegister; 2575 } 2576 2577 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2578 const MCRegisterClass RC = TRI->getRegClass(RCID); 2579 if (RegIdx >= RC.getNumRegs()) { 2580 Error(Loc, "register index is out of range"); 2581 return AMDGPU::NoRegister; 2582 } 2583 2584 return RC.getRegister(RegIdx); 2585 } 2586 2587 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2588 int64_t RegLo, RegHi; 2589 if (!skipToken(AsmToken::LBrac, "missing register index")) 2590 return false; 2591 2592 SMLoc FirstIdxLoc = getLoc(); 2593 SMLoc SecondIdxLoc; 2594 2595 if (!parseExpr(RegLo)) 2596 return false; 2597 2598 if (trySkipToken(AsmToken::Colon)) { 2599 SecondIdxLoc = getLoc(); 2600 if (!parseExpr(RegHi)) 2601 return false; 2602 } else { 2603 RegHi = RegLo; 2604 } 2605 2606 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2607 return false; 2608 2609 if (!isUInt<32>(RegLo)) { 2610 Error(FirstIdxLoc, "invalid register index"); 2611 return false; 2612 } 2613 2614 if (!isUInt<32>(RegHi)) { 2615 Error(SecondIdxLoc, "invalid register index"); 2616 return false; 2617 } 2618 2619 if (RegLo > RegHi) { 2620 Error(FirstIdxLoc, "first register index should not exceed second index"); 2621 return false; 2622 } 2623 2624 Num = static_cast<unsigned>(RegLo); 2625 RegWidth = 32 * ((RegHi - RegLo) + 1); 2626 return true; 2627 } 2628 2629 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2630 unsigned &RegNum, unsigned &RegWidth, 2631 SmallVectorImpl<AsmToken> &Tokens) { 2632 assert(isToken(AsmToken::Identifier)); 2633 unsigned Reg = getSpecialRegForName(getTokenStr()); 2634 if (Reg) { 2635 RegNum = 0; 2636 RegWidth = 32; 2637 RegKind = IS_SPECIAL; 2638 Tokens.push_back(getToken()); 2639 lex(); // skip register name 2640 } 2641 return Reg; 2642 } 2643 2644 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2645 unsigned &RegNum, unsigned &RegWidth, 2646 SmallVectorImpl<AsmToken> &Tokens) { 2647 assert(isToken(AsmToken::Identifier)); 2648 StringRef RegName = getTokenStr(); 2649 auto Loc = getLoc(); 2650 2651 const RegInfo *RI = getRegularRegInfo(RegName); 2652 if (!RI) { 2653 Error(Loc, "invalid register name"); 2654 return AMDGPU::NoRegister; 2655 } 2656 2657 Tokens.push_back(getToken()); 2658 lex(); // skip register name 2659 2660 RegKind = RI->Kind; 2661 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2662 if (!RegSuffix.empty()) { 2663 // Single 32-bit register: vXX. 2664 if (!getRegNum(RegSuffix, RegNum)) { 2665 Error(Loc, "invalid register index"); 2666 return AMDGPU::NoRegister; 2667 } 2668 RegWidth = 32; 2669 } else { 2670 // Range of registers: v[XX:YY]. ":YY" is optional. 2671 if (!ParseRegRange(RegNum, RegWidth)) 2672 return AMDGPU::NoRegister; 2673 } 2674 2675 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2676 } 2677 2678 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2679 unsigned &RegWidth, 2680 SmallVectorImpl<AsmToken> &Tokens) { 2681 unsigned Reg = AMDGPU::NoRegister; 2682 auto ListLoc = getLoc(); 2683 2684 if (!skipToken(AsmToken::LBrac, 2685 "expected a register or a list of registers")) { 2686 return AMDGPU::NoRegister; 2687 } 2688 2689 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2690 2691 auto Loc = getLoc(); 2692 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2693 return AMDGPU::NoRegister; 2694 if (RegWidth != 32) { 2695 Error(Loc, "expected a single 32-bit register"); 2696 return AMDGPU::NoRegister; 2697 } 2698 2699 for (; trySkipToken(AsmToken::Comma); ) { 2700 RegisterKind NextRegKind; 2701 unsigned NextReg, NextRegNum, NextRegWidth; 2702 Loc = getLoc(); 2703 2704 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2705 NextRegNum, NextRegWidth, 2706 Tokens)) { 2707 return AMDGPU::NoRegister; 2708 } 2709 if (NextRegWidth != 32) { 2710 Error(Loc, "expected a single 32-bit register"); 2711 return AMDGPU::NoRegister; 2712 } 2713 if (NextRegKind != RegKind) { 2714 Error(Loc, "registers in a list must be of the same kind"); 2715 return AMDGPU::NoRegister; 2716 } 2717 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2718 return AMDGPU::NoRegister; 2719 } 2720 2721 if (!skipToken(AsmToken::RBrac, 2722 "expected a comma or a closing square bracket")) { 2723 return AMDGPU::NoRegister; 2724 } 2725 2726 if (isRegularReg(RegKind)) 2727 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2728 2729 return Reg; 2730 } 2731 2732 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2733 unsigned &RegNum, unsigned &RegWidth, 2734 SmallVectorImpl<AsmToken> &Tokens) { 2735 auto Loc = getLoc(); 2736 Reg = AMDGPU::NoRegister; 2737 2738 if (isToken(AsmToken::Identifier)) { 2739 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2740 if (Reg == AMDGPU::NoRegister) 2741 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2742 } else { 2743 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2744 } 2745 2746 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2747 if (Reg == AMDGPU::NoRegister) { 2748 assert(Parser.hasPendingError()); 2749 return false; 2750 } 2751 2752 if (!subtargetHasRegister(*TRI, Reg)) { 2753 if (Reg == AMDGPU::SGPR_NULL) { 2754 Error(Loc, "'null' operand is not supported on this GPU"); 2755 } else { 2756 Error(Loc, "register not available on this GPU"); 2757 } 2758 return false; 2759 } 2760 2761 return true; 2762 } 2763 2764 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2765 unsigned &RegNum, unsigned &RegWidth, 2766 bool RestoreOnFailure /*=false*/) { 2767 Reg = AMDGPU::NoRegister; 2768 2769 SmallVector<AsmToken, 1> Tokens; 2770 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2771 if (RestoreOnFailure) { 2772 while (!Tokens.empty()) { 2773 getLexer().UnLex(Tokens.pop_back_val()); 2774 } 2775 } 2776 return true; 2777 } 2778 return false; 2779 } 2780 2781 Optional<StringRef> 2782 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2783 switch (RegKind) { 2784 case IS_VGPR: 2785 return StringRef(".amdgcn.next_free_vgpr"); 2786 case IS_SGPR: 2787 return StringRef(".amdgcn.next_free_sgpr"); 2788 default: 2789 return None; 2790 } 2791 } 2792 2793 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2794 auto SymbolName = getGprCountSymbolName(RegKind); 2795 assert(SymbolName && "initializing invalid register kind"); 2796 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2797 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2798 } 2799 2800 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2801 unsigned DwordRegIndex, 2802 unsigned RegWidth) { 2803 // Symbols are only defined for GCN targets 2804 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2805 return true; 2806 2807 auto SymbolName = getGprCountSymbolName(RegKind); 2808 if (!SymbolName) 2809 return true; 2810 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2811 2812 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2813 int64_t OldCount; 2814 2815 if (!Sym->isVariable()) 2816 return !Error(getLoc(), 2817 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2818 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2819 return !Error( 2820 getLoc(), 2821 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2822 2823 if (OldCount <= NewMax) 2824 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2825 2826 return true; 2827 } 2828 2829 std::unique_ptr<AMDGPUOperand> 2830 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2831 const auto &Tok = getToken(); 2832 SMLoc StartLoc = Tok.getLoc(); 2833 SMLoc EndLoc = Tok.getEndLoc(); 2834 RegisterKind RegKind; 2835 unsigned Reg, RegNum, RegWidth; 2836 2837 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2838 return nullptr; 2839 } 2840 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2841 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2842 return nullptr; 2843 } else 2844 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2845 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2846 } 2847 2848 OperandMatchResultTy 2849 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2850 // TODO: add syntactic sugar for 1/(2*PI) 2851 2852 assert(!isRegister()); 2853 assert(!isModifier()); 2854 2855 const auto& Tok = getToken(); 2856 const auto& NextTok = peekToken(); 2857 bool IsReal = Tok.is(AsmToken::Real); 2858 SMLoc S = getLoc(); 2859 bool Negate = false; 2860 2861 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2862 lex(); 2863 IsReal = true; 2864 Negate = true; 2865 } 2866 2867 if (IsReal) { 2868 // Floating-point expressions are not supported. 2869 // Can only allow floating-point literals with an 2870 // optional sign. 2871 2872 StringRef Num = getTokenStr(); 2873 lex(); 2874 2875 APFloat RealVal(APFloat::IEEEdouble()); 2876 auto roundMode = APFloat::rmNearestTiesToEven; 2877 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2878 return MatchOperand_ParseFail; 2879 } 2880 if (Negate) 2881 RealVal.changeSign(); 2882 2883 Operands.push_back( 2884 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2885 AMDGPUOperand::ImmTyNone, true)); 2886 2887 return MatchOperand_Success; 2888 2889 } else { 2890 int64_t IntVal; 2891 const MCExpr *Expr; 2892 SMLoc S = getLoc(); 2893 2894 if (HasSP3AbsModifier) { 2895 // This is a workaround for handling expressions 2896 // as arguments of SP3 'abs' modifier, for example: 2897 // |1.0| 2898 // |-1| 2899 // |1+x| 2900 // This syntax is not compatible with syntax of standard 2901 // MC expressions (due to the trailing '|'). 2902 SMLoc EndLoc; 2903 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2904 return MatchOperand_ParseFail; 2905 } else { 2906 if (Parser.parseExpression(Expr)) 2907 return MatchOperand_ParseFail; 2908 } 2909 2910 if (Expr->evaluateAsAbsolute(IntVal)) { 2911 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2912 } else { 2913 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2914 } 2915 2916 return MatchOperand_Success; 2917 } 2918 2919 return MatchOperand_NoMatch; 2920 } 2921 2922 OperandMatchResultTy 2923 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2924 if (!isRegister()) 2925 return MatchOperand_NoMatch; 2926 2927 if (auto R = parseRegister()) { 2928 assert(R->isReg()); 2929 Operands.push_back(std::move(R)); 2930 return MatchOperand_Success; 2931 } 2932 return MatchOperand_ParseFail; 2933 } 2934 2935 OperandMatchResultTy 2936 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2937 auto res = parseReg(Operands); 2938 if (res != MatchOperand_NoMatch) { 2939 return res; 2940 } else if (isModifier()) { 2941 return MatchOperand_NoMatch; 2942 } else { 2943 return parseImm(Operands, HasSP3AbsMod); 2944 } 2945 } 2946 2947 bool 2948 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2949 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2950 const auto &str = Token.getString(); 2951 return str == "abs" || str == "neg" || str == "sext"; 2952 } 2953 return false; 2954 } 2955 2956 bool 2957 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2958 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2959 } 2960 2961 bool 2962 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2963 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2964 } 2965 2966 bool 2967 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2968 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2969 } 2970 2971 // Check if this is an operand modifier or an opcode modifier 2972 // which may look like an expression but it is not. We should 2973 // avoid parsing these modifiers as expressions. Currently 2974 // recognized sequences are: 2975 // |...| 2976 // abs(...) 2977 // neg(...) 2978 // sext(...) 2979 // -reg 2980 // -|...| 2981 // -abs(...) 2982 // name:... 2983 // Note that simple opcode modifiers like 'gds' may be parsed as 2984 // expressions; this is a special case. See getExpressionAsToken. 2985 // 2986 bool 2987 AMDGPUAsmParser::isModifier() { 2988 2989 AsmToken Tok = getToken(); 2990 AsmToken NextToken[2]; 2991 peekTokens(NextToken); 2992 2993 return isOperandModifier(Tok, NextToken[0]) || 2994 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2995 isOpcodeModifierWithVal(Tok, NextToken[0]); 2996 } 2997 2998 // Check if the current token is an SP3 'neg' modifier. 2999 // Currently this modifier is allowed in the following context: 3000 // 3001 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3002 // 2. Before an 'abs' modifier: -abs(...) 3003 // 3. Before an SP3 'abs' modifier: -|...| 3004 // 3005 // In all other cases "-" is handled as a part 3006 // of an expression that follows the sign. 3007 // 3008 // Note: When "-" is followed by an integer literal, 3009 // this is interpreted as integer negation rather 3010 // than a floating-point NEG modifier applied to N. 3011 // Beside being contr-intuitive, such use of floating-point 3012 // NEG modifier would have resulted in different meaning 3013 // of integer literals used with VOP1/2/C and VOP3, 3014 // for example: 3015 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3016 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3017 // Negative fp literals with preceding "-" are 3018 // handled likewise for uniformity 3019 // 3020 bool 3021 AMDGPUAsmParser::parseSP3NegModifier() { 3022 3023 AsmToken NextToken[2]; 3024 peekTokens(NextToken); 3025 3026 if (isToken(AsmToken::Minus) && 3027 (isRegister(NextToken[0], NextToken[1]) || 3028 NextToken[0].is(AsmToken::Pipe) || 3029 isId(NextToken[0], "abs"))) { 3030 lex(); 3031 return true; 3032 } 3033 3034 return false; 3035 } 3036 3037 OperandMatchResultTy 3038 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3039 bool AllowImm) { 3040 bool Neg, SP3Neg; 3041 bool Abs, SP3Abs; 3042 SMLoc Loc; 3043 3044 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3045 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3046 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3047 return MatchOperand_ParseFail; 3048 } 3049 3050 SP3Neg = parseSP3NegModifier(); 3051 3052 Loc = getLoc(); 3053 Neg = trySkipId("neg"); 3054 if (Neg && SP3Neg) { 3055 Error(Loc, "expected register or immediate"); 3056 return MatchOperand_ParseFail; 3057 } 3058 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3059 return MatchOperand_ParseFail; 3060 3061 Abs = trySkipId("abs"); 3062 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3063 return MatchOperand_ParseFail; 3064 3065 Loc = getLoc(); 3066 SP3Abs = trySkipToken(AsmToken::Pipe); 3067 if (Abs && SP3Abs) { 3068 Error(Loc, "expected register or immediate"); 3069 return MatchOperand_ParseFail; 3070 } 3071 3072 OperandMatchResultTy Res; 3073 if (AllowImm) { 3074 Res = parseRegOrImm(Operands, SP3Abs); 3075 } else { 3076 Res = parseReg(Operands); 3077 } 3078 if (Res != MatchOperand_Success) { 3079 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3080 } 3081 3082 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3083 return MatchOperand_ParseFail; 3084 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3085 return MatchOperand_ParseFail; 3086 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3087 return MatchOperand_ParseFail; 3088 3089 AMDGPUOperand::Modifiers Mods; 3090 Mods.Abs = Abs || SP3Abs; 3091 Mods.Neg = Neg || SP3Neg; 3092 3093 if (Mods.hasFPModifiers()) { 3094 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3095 if (Op.isExpr()) { 3096 Error(Op.getStartLoc(), "expected an absolute expression"); 3097 return MatchOperand_ParseFail; 3098 } 3099 Op.setModifiers(Mods); 3100 } 3101 return MatchOperand_Success; 3102 } 3103 3104 OperandMatchResultTy 3105 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3106 bool AllowImm) { 3107 bool Sext = trySkipId("sext"); 3108 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3109 return MatchOperand_ParseFail; 3110 3111 OperandMatchResultTy Res; 3112 if (AllowImm) { 3113 Res = parseRegOrImm(Operands); 3114 } else { 3115 Res = parseReg(Operands); 3116 } 3117 if (Res != MatchOperand_Success) { 3118 return Sext? MatchOperand_ParseFail : Res; 3119 } 3120 3121 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3122 return MatchOperand_ParseFail; 3123 3124 AMDGPUOperand::Modifiers Mods; 3125 Mods.Sext = Sext; 3126 3127 if (Mods.hasIntModifiers()) { 3128 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3129 if (Op.isExpr()) { 3130 Error(Op.getStartLoc(), "expected an absolute expression"); 3131 return MatchOperand_ParseFail; 3132 } 3133 Op.setModifiers(Mods); 3134 } 3135 3136 return MatchOperand_Success; 3137 } 3138 3139 OperandMatchResultTy 3140 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3141 return parseRegOrImmWithFPInputMods(Operands, false); 3142 } 3143 3144 OperandMatchResultTy 3145 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3146 return parseRegOrImmWithIntInputMods(Operands, false); 3147 } 3148 3149 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3150 auto Loc = getLoc(); 3151 if (trySkipId("off")) { 3152 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3153 AMDGPUOperand::ImmTyOff, false)); 3154 return MatchOperand_Success; 3155 } 3156 3157 if (!isRegister()) 3158 return MatchOperand_NoMatch; 3159 3160 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3161 if (Reg) { 3162 Operands.push_back(std::move(Reg)); 3163 return MatchOperand_Success; 3164 } 3165 3166 return MatchOperand_ParseFail; 3167 3168 } 3169 3170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3171 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3172 3173 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3174 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3175 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3176 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3177 return Match_InvalidOperand; 3178 3179 if ((TSFlags & SIInstrFlags::VOP3) && 3180 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3181 getForcedEncodingSize() != 64) 3182 return Match_PreferE32; 3183 3184 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3185 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3186 // v_mac_f32/16 allow only dst_sel == DWORD; 3187 auto OpNum = 3188 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3189 const auto &Op = Inst.getOperand(OpNum); 3190 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3191 return Match_InvalidOperand; 3192 } 3193 } 3194 3195 return Match_Success; 3196 } 3197 3198 static ArrayRef<unsigned> getAllVariants() { 3199 static const unsigned Variants[] = { 3200 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3201 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3202 }; 3203 3204 return makeArrayRef(Variants); 3205 } 3206 3207 // What asm variants we should check 3208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3209 if (getForcedEncodingSize() == 32) { 3210 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3211 return makeArrayRef(Variants); 3212 } 3213 3214 if (isForcedVOP3()) { 3215 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3216 return makeArrayRef(Variants); 3217 } 3218 3219 if (isForcedSDWA()) { 3220 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3221 AMDGPUAsmVariants::SDWA9}; 3222 return makeArrayRef(Variants); 3223 } 3224 3225 if (isForcedDPP()) { 3226 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3227 return makeArrayRef(Variants); 3228 } 3229 3230 return getAllVariants(); 3231 } 3232 3233 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3234 if (getForcedEncodingSize() == 32) 3235 return "e32"; 3236 3237 if (isForcedVOP3()) 3238 return "e64"; 3239 3240 if (isForcedSDWA()) 3241 return "sdwa"; 3242 3243 if (isForcedDPP()) 3244 return "dpp"; 3245 3246 return ""; 3247 } 3248 3249 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3250 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3251 const unsigned Num = Desc.getNumImplicitUses(); 3252 for (unsigned i = 0; i < Num; ++i) { 3253 unsigned Reg = Desc.ImplicitUses[i]; 3254 switch (Reg) { 3255 case AMDGPU::FLAT_SCR: 3256 case AMDGPU::VCC: 3257 case AMDGPU::VCC_LO: 3258 case AMDGPU::VCC_HI: 3259 case AMDGPU::M0: 3260 return Reg; 3261 default: 3262 break; 3263 } 3264 } 3265 return AMDGPU::NoRegister; 3266 } 3267 3268 // NB: This code is correct only when used to check constant 3269 // bus limitations because GFX7 support no f16 inline constants. 3270 // Note that there are no cases when a GFX7 opcode violates 3271 // constant bus limitations due to the use of an f16 constant. 3272 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3273 unsigned OpIdx) const { 3274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3275 3276 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3277 return false; 3278 } 3279 3280 const MCOperand &MO = Inst.getOperand(OpIdx); 3281 3282 int64_t Val = MO.getImm(); 3283 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3284 3285 switch (OpSize) { // expected operand size 3286 case 8: 3287 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3288 case 4: 3289 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3290 case 2: { 3291 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3292 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3293 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3294 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3295 return AMDGPU::isInlinableIntLiteral(Val); 3296 3297 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3298 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3299 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3300 return AMDGPU::isInlinableIntLiteralV216(Val); 3301 3302 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3303 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3304 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3305 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3306 3307 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3308 } 3309 default: 3310 llvm_unreachable("invalid operand size"); 3311 } 3312 } 3313 3314 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3315 if (!isGFX10Plus()) 3316 return 1; 3317 3318 switch (Opcode) { 3319 // 64-bit shift instructions can use only one scalar value input 3320 case AMDGPU::V_LSHLREV_B64_e64: 3321 case AMDGPU::V_LSHLREV_B64_gfx10: 3322 case AMDGPU::V_LSHRREV_B64_e64: 3323 case AMDGPU::V_LSHRREV_B64_gfx10: 3324 case AMDGPU::V_ASHRREV_I64_e64: 3325 case AMDGPU::V_ASHRREV_I64_gfx10: 3326 case AMDGPU::V_LSHL_B64_e64: 3327 case AMDGPU::V_LSHR_B64_e64: 3328 case AMDGPU::V_ASHR_I64_e64: 3329 return 1; 3330 default: 3331 return 2; 3332 } 3333 } 3334 3335 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3336 const MCOperand &MO = Inst.getOperand(OpIdx); 3337 if (MO.isImm()) { 3338 return !isInlineConstant(Inst, OpIdx); 3339 } else if (MO.isReg()) { 3340 auto Reg = MO.getReg(); 3341 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3342 auto PReg = mc2PseudoReg(Reg); 3343 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3344 } else { 3345 return true; 3346 } 3347 } 3348 3349 bool 3350 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3351 const OperandVector &Operands) { 3352 const unsigned Opcode = Inst.getOpcode(); 3353 const MCInstrDesc &Desc = MII.get(Opcode); 3354 unsigned LastSGPR = AMDGPU::NoRegister; 3355 unsigned ConstantBusUseCount = 0; 3356 unsigned NumLiterals = 0; 3357 unsigned LiteralSize; 3358 3359 if (Desc.TSFlags & 3360 (SIInstrFlags::VOPC | 3361 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3362 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3363 SIInstrFlags::SDWA)) { 3364 // Check special imm operands (used by madmk, etc) 3365 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3366 ++NumLiterals; 3367 LiteralSize = 4; 3368 } 3369 3370 SmallDenseSet<unsigned> SGPRsUsed; 3371 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3372 if (SGPRUsed != AMDGPU::NoRegister) { 3373 SGPRsUsed.insert(SGPRUsed); 3374 ++ConstantBusUseCount; 3375 } 3376 3377 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3378 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3379 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3380 3381 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3382 3383 for (int OpIdx : OpIndices) { 3384 if (OpIdx == -1) break; 3385 3386 const MCOperand &MO = Inst.getOperand(OpIdx); 3387 if (usesConstantBus(Inst, OpIdx)) { 3388 if (MO.isReg()) { 3389 LastSGPR = mc2PseudoReg(MO.getReg()); 3390 // Pairs of registers with a partial intersections like these 3391 // s0, s[0:1] 3392 // flat_scratch_lo, flat_scratch 3393 // flat_scratch_lo, flat_scratch_hi 3394 // are theoretically valid but they are disabled anyway. 3395 // Note that this code mimics SIInstrInfo::verifyInstruction 3396 if (!SGPRsUsed.count(LastSGPR)) { 3397 SGPRsUsed.insert(LastSGPR); 3398 ++ConstantBusUseCount; 3399 } 3400 } else { // Expression or a literal 3401 3402 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3403 continue; // special operand like VINTERP attr_chan 3404 3405 // An instruction may use only one literal. 3406 // This has been validated on the previous step. 3407 // See validateVOPLiteral. 3408 // This literal may be used as more than one operand. 3409 // If all these operands are of the same size, 3410 // this literal counts as one scalar value. 3411 // Otherwise it counts as 2 scalar values. 3412 // See "GFX10 Shader Programming", section 3.6.2.3. 3413 3414 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3415 if (Size < 4) Size = 4; 3416 3417 if (NumLiterals == 0) { 3418 NumLiterals = 1; 3419 LiteralSize = Size; 3420 } else if (LiteralSize != Size) { 3421 NumLiterals = 2; 3422 } 3423 } 3424 } 3425 } 3426 } 3427 ConstantBusUseCount += NumLiterals; 3428 3429 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3430 return true; 3431 3432 SMLoc LitLoc = getLitLoc(Operands); 3433 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3434 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3435 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3436 return false; 3437 } 3438 3439 bool 3440 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3441 const OperandVector &Operands) { 3442 const unsigned Opcode = Inst.getOpcode(); 3443 const MCInstrDesc &Desc = MII.get(Opcode); 3444 3445 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3446 if (DstIdx == -1 || 3447 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3448 return true; 3449 } 3450 3451 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3452 3453 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3454 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3455 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3456 3457 assert(DstIdx != -1); 3458 const MCOperand &Dst = Inst.getOperand(DstIdx); 3459 assert(Dst.isReg()); 3460 3461 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3462 3463 for (int SrcIdx : SrcIndices) { 3464 if (SrcIdx == -1) break; 3465 const MCOperand &Src = Inst.getOperand(SrcIdx); 3466 if (Src.isReg()) { 3467 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3468 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3469 Error(getRegLoc(SrcReg, Operands), 3470 "destination must be different than all sources"); 3471 return false; 3472 } 3473 } 3474 } 3475 3476 return true; 3477 } 3478 3479 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3480 3481 const unsigned Opc = Inst.getOpcode(); 3482 const MCInstrDesc &Desc = MII.get(Opc); 3483 3484 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3485 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3486 assert(ClampIdx != -1); 3487 return Inst.getOperand(ClampIdx).getImm() == 0; 3488 } 3489 3490 return true; 3491 } 3492 3493 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3494 3495 const unsigned Opc = Inst.getOpcode(); 3496 const MCInstrDesc &Desc = MII.get(Opc); 3497 3498 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3499 return None; 3500 3501 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3502 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3503 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3504 3505 assert(VDataIdx != -1); 3506 3507 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3508 return None; 3509 3510 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3511 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3512 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3513 if (DMask == 0) 3514 DMask = 1; 3515 3516 bool isPackedD16 = false; 3517 unsigned DataSize = 3518 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3519 if (hasPackedD16()) { 3520 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3521 isPackedD16 = D16Idx >= 0; 3522 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3523 DataSize = (DataSize + 1) / 2; 3524 } 3525 3526 if ((VDataSize / 4) == DataSize + TFESize) 3527 return None; 3528 3529 return StringRef(isPackedD16 3530 ? "image data size does not match dmask, d16 and tfe" 3531 : "image data size does not match dmask and tfe"); 3532 } 3533 3534 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3535 const unsigned Opc = Inst.getOpcode(); 3536 const MCInstrDesc &Desc = MII.get(Opc); 3537 3538 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3539 return true; 3540 3541 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3542 3543 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3544 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3545 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3546 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3547 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3548 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3549 3550 assert(VAddr0Idx != -1); 3551 assert(SrsrcIdx != -1); 3552 assert(SrsrcIdx > VAddr0Idx); 3553 3554 if (DimIdx == -1) 3555 return true; // intersect_ray 3556 3557 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3558 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3559 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3560 unsigned ActualAddrSize = 3561 IsNSA ? SrsrcIdx - VAddr0Idx 3562 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3563 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3564 3565 unsigned ExpectedAddrSize = 3566 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3567 3568 if (!IsNSA) { 3569 if (ExpectedAddrSize > 8) 3570 ExpectedAddrSize = 16; 3571 3572 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3573 // This provides backward compatibility for assembly created 3574 // before 160b/192b/224b types were directly supported. 3575 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3576 return true; 3577 } 3578 3579 return ActualAddrSize == ExpectedAddrSize; 3580 } 3581 3582 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3583 3584 const unsigned Opc = Inst.getOpcode(); 3585 const MCInstrDesc &Desc = MII.get(Opc); 3586 3587 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3588 return true; 3589 if (!Desc.mayLoad() || !Desc.mayStore()) 3590 return true; // Not atomic 3591 3592 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3593 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3594 3595 // This is an incomplete check because image_atomic_cmpswap 3596 // may only use 0x3 and 0xf while other atomic operations 3597 // may use 0x1 and 0x3. However these limitations are 3598 // verified when we check that dmask matches dst size. 3599 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3600 } 3601 3602 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3603 3604 const unsigned Opc = Inst.getOpcode(); 3605 const MCInstrDesc &Desc = MII.get(Opc); 3606 3607 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3608 return true; 3609 3610 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3611 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3612 3613 // GATHER4 instructions use dmask in a different fashion compared to 3614 // other MIMG instructions. The only useful DMASK values are 3615 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3616 // (red,red,red,red) etc.) The ISA document doesn't mention 3617 // this. 3618 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3619 } 3620 3621 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3622 const unsigned Opc = Inst.getOpcode(); 3623 const MCInstrDesc &Desc = MII.get(Opc); 3624 3625 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3626 return true; 3627 3628 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3629 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3630 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3631 3632 if (!BaseOpcode->MSAA) 3633 return true; 3634 3635 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3636 assert(DimIdx != -1); 3637 3638 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3639 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3640 3641 return DimInfo->MSAA; 3642 } 3643 3644 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3645 { 3646 switch (Opcode) { 3647 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3648 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3649 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3650 return true; 3651 default: 3652 return false; 3653 } 3654 } 3655 3656 // movrels* opcodes should only allow VGPRS as src0. 3657 // This is specified in .td description for vop1/vop3, 3658 // but sdwa is handled differently. See isSDWAOperand. 3659 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3660 const OperandVector &Operands) { 3661 3662 const unsigned Opc = Inst.getOpcode(); 3663 const MCInstrDesc &Desc = MII.get(Opc); 3664 3665 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3666 return true; 3667 3668 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3669 assert(Src0Idx != -1); 3670 3671 SMLoc ErrLoc; 3672 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3673 if (Src0.isReg()) { 3674 auto Reg = mc2PseudoReg(Src0.getReg()); 3675 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3676 if (!isSGPR(Reg, TRI)) 3677 return true; 3678 ErrLoc = getRegLoc(Reg, Operands); 3679 } else { 3680 ErrLoc = getConstLoc(Operands); 3681 } 3682 3683 Error(ErrLoc, "source operand must be a VGPR"); 3684 return false; 3685 } 3686 3687 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3688 const OperandVector &Operands) { 3689 3690 const unsigned Opc = Inst.getOpcode(); 3691 3692 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3693 return true; 3694 3695 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3696 assert(Src0Idx != -1); 3697 3698 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3699 if (!Src0.isReg()) 3700 return true; 3701 3702 auto Reg = mc2PseudoReg(Src0.getReg()); 3703 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3704 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3705 Error(getRegLoc(Reg, Operands), 3706 "source operand must be either a VGPR or an inline constant"); 3707 return false; 3708 } 3709 3710 return true; 3711 } 3712 3713 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3714 const OperandVector &Operands) { 3715 const unsigned Opc = Inst.getOpcode(); 3716 const MCInstrDesc &Desc = MII.get(Opc); 3717 3718 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3719 return true; 3720 3721 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3722 if (Src2Idx == -1) 3723 return true; 3724 3725 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3726 if (!Src2.isReg()) 3727 return true; 3728 3729 MCRegister Src2Reg = Src2.getReg(); 3730 MCRegister DstReg = Inst.getOperand(0).getReg(); 3731 if (Src2Reg == DstReg) 3732 return true; 3733 3734 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3735 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3736 return true; 3737 3738 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3739 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3740 "source 2 operand must not partially overlap with dst"); 3741 return false; 3742 } 3743 3744 return true; 3745 } 3746 3747 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3748 switch (Inst.getOpcode()) { 3749 default: 3750 return true; 3751 case V_DIV_SCALE_F32_gfx6_gfx7: 3752 case V_DIV_SCALE_F32_vi: 3753 case V_DIV_SCALE_F32_gfx10: 3754 case V_DIV_SCALE_F64_gfx6_gfx7: 3755 case V_DIV_SCALE_F64_vi: 3756 case V_DIV_SCALE_F64_gfx10: 3757 break; 3758 } 3759 3760 // TODO: Check that src0 = src1 or src2. 3761 3762 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3763 AMDGPU::OpName::src2_modifiers, 3764 AMDGPU::OpName::src2_modifiers}) { 3765 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3766 .getImm() & 3767 SISrcMods::ABS) { 3768 return false; 3769 } 3770 } 3771 3772 return true; 3773 } 3774 3775 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3776 3777 const unsigned Opc = Inst.getOpcode(); 3778 const MCInstrDesc &Desc = MII.get(Opc); 3779 3780 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3781 return true; 3782 3783 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3784 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3785 if (isCI() || isSI()) 3786 return false; 3787 } 3788 3789 return true; 3790 } 3791 3792 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3793 const unsigned Opc = Inst.getOpcode(); 3794 const MCInstrDesc &Desc = MII.get(Opc); 3795 3796 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3797 return true; 3798 3799 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3800 if (DimIdx < 0) 3801 return true; 3802 3803 long Imm = Inst.getOperand(DimIdx).getImm(); 3804 if (Imm < 0 || Imm >= 8) 3805 return false; 3806 3807 return true; 3808 } 3809 3810 static bool IsRevOpcode(const unsigned Opcode) 3811 { 3812 switch (Opcode) { 3813 case AMDGPU::V_SUBREV_F32_e32: 3814 case AMDGPU::V_SUBREV_F32_e64: 3815 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3816 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3817 case AMDGPU::V_SUBREV_F32_e32_vi: 3818 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3819 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3820 case AMDGPU::V_SUBREV_F32_e64_vi: 3821 3822 case AMDGPU::V_SUBREV_CO_U32_e32: 3823 case AMDGPU::V_SUBREV_CO_U32_e64: 3824 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3825 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3826 3827 case AMDGPU::V_SUBBREV_U32_e32: 3828 case AMDGPU::V_SUBBREV_U32_e64: 3829 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3830 case AMDGPU::V_SUBBREV_U32_e32_vi: 3831 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3832 case AMDGPU::V_SUBBREV_U32_e64_vi: 3833 3834 case AMDGPU::V_SUBREV_U32_e32: 3835 case AMDGPU::V_SUBREV_U32_e64: 3836 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3837 case AMDGPU::V_SUBREV_U32_e32_vi: 3838 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3839 case AMDGPU::V_SUBREV_U32_e64_vi: 3840 3841 case AMDGPU::V_SUBREV_F16_e32: 3842 case AMDGPU::V_SUBREV_F16_e64: 3843 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3844 case AMDGPU::V_SUBREV_F16_e32_vi: 3845 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3846 case AMDGPU::V_SUBREV_F16_e64_vi: 3847 3848 case AMDGPU::V_SUBREV_U16_e32: 3849 case AMDGPU::V_SUBREV_U16_e64: 3850 case AMDGPU::V_SUBREV_U16_e32_vi: 3851 case AMDGPU::V_SUBREV_U16_e64_vi: 3852 3853 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3854 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3855 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3856 3857 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3858 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3859 3860 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3861 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3862 3863 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3864 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3865 3866 case AMDGPU::V_LSHRREV_B32_e32: 3867 case AMDGPU::V_LSHRREV_B32_e64: 3868 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3869 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3870 case AMDGPU::V_LSHRREV_B32_e32_vi: 3871 case AMDGPU::V_LSHRREV_B32_e64_vi: 3872 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3873 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3874 3875 case AMDGPU::V_ASHRREV_I32_e32: 3876 case AMDGPU::V_ASHRREV_I32_e64: 3877 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3878 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3879 case AMDGPU::V_ASHRREV_I32_e32_vi: 3880 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3881 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3882 case AMDGPU::V_ASHRREV_I32_e64_vi: 3883 3884 case AMDGPU::V_LSHLREV_B32_e32: 3885 case AMDGPU::V_LSHLREV_B32_e64: 3886 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3887 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3888 case AMDGPU::V_LSHLREV_B32_e32_vi: 3889 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3890 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3891 case AMDGPU::V_LSHLREV_B32_e64_vi: 3892 3893 case AMDGPU::V_LSHLREV_B16_e32: 3894 case AMDGPU::V_LSHLREV_B16_e64: 3895 case AMDGPU::V_LSHLREV_B16_e32_vi: 3896 case AMDGPU::V_LSHLREV_B16_e64_vi: 3897 case AMDGPU::V_LSHLREV_B16_gfx10: 3898 3899 case AMDGPU::V_LSHRREV_B16_e32: 3900 case AMDGPU::V_LSHRREV_B16_e64: 3901 case AMDGPU::V_LSHRREV_B16_e32_vi: 3902 case AMDGPU::V_LSHRREV_B16_e64_vi: 3903 case AMDGPU::V_LSHRREV_B16_gfx10: 3904 3905 case AMDGPU::V_ASHRREV_I16_e32: 3906 case AMDGPU::V_ASHRREV_I16_e64: 3907 case AMDGPU::V_ASHRREV_I16_e32_vi: 3908 case AMDGPU::V_ASHRREV_I16_e64_vi: 3909 case AMDGPU::V_ASHRREV_I16_gfx10: 3910 3911 case AMDGPU::V_LSHLREV_B64_e64: 3912 case AMDGPU::V_LSHLREV_B64_gfx10: 3913 case AMDGPU::V_LSHLREV_B64_vi: 3914 3915 case AMDGPU::V_LSHRREV_B64_e64: 3916 case AMDGPU::V_LSHRREV_B64_gfx10: 3917 case AMDGPU::V_LSHRREV_B64_vi: 3918 3919 case AMDGPU::V_ASHRREV_I64_e64: 3920 case AMDGPU::V_ASHRREV_I64_gfx10: 3921 case AMDGPU::V_ASHRREV_I64_vi: 3922 3923 case AMDGPU::V_PK_LSHLREV_B16: 3924 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3925 case AMDGPU::V_PK_LSHLREV_B16_vi: 3926 3927 case AMDGPU::V_PK_LSHRREV_B16: 3928 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3929 case AMDGPU::V_PK_LSHRREV_B16_vi: 3930 case AMDGPU::V_PK_ASHRREV_I16: 3931 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3932 case AMDGPU::V_PK_ASHRREV_I16_vi: 3933 return true; 3934 default: 3935 return false; 3936 } 3937 } 3938 3939 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3940 3941 using namespace SIInstrFlags; 3942 const unsigned Opcode = Inst.getOpcode(); 3943 const MCInstrDesc &Desc = MII.get(Opcode); 3944 3945 // lds_direct register is defined so that it can be used 3946 // with 9-bit operands only. Ignore encodings which do not accept these. 3947 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3948 if ((Desc.TSFlags & Enc) == 0) 3949 return None; 3950 3951 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3952 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3953 if (SrcIdx == -1) 3954 break; 3955 const auto &Src = Inst.getOperand(SrcIdx); 3956 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3957 3958 if (isGFX90A()) 3959 return StringRef("lds_direct is not supported on this GPU"); 3960 3961 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3962 return StringRef("lds_direct cannot be used with this instruction"); 3963 3964 if (SrcName != OpName::src0) 3965 return StringRef("lds_direct may be used as src0 only"); 3966 } 3967 } 3968 3969 return None; 3970 } 3971 3972 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3973 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3974 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3975 if (Op.isFlatOffset()) 3976 return Op.getStartLoc(); 3977 } 3978 return getLoc(); 3979 } 3980 3981 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3982 const OperandVector &Operands) { 3983 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3984 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3985 return true; 3986 3987 auto Opcode = Inst.getOpcode(); 3988 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3989 assert(OpNum != -1); 3990 3991 const auto &Op = Inst.getOperand(OpNum); 3992 if (!hasFlatOffsets() && Op.getImm() != 0) { 3993 Error(getFlatOffsetLoc(Operands), 3994 "flat offset modifier is not supported on this GPU"); 3995 return false; 3996 } 3997 3998 // For FLAT segment the offset must be positive; 3999 // MSB is ignored and forced to zero. 4000 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4001 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4002 if (!isIntN(OffsetSize, Op.getImm())) { 4003 Error(getFlatOffsetLoc(Operands), 4004 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4005 return false; 4006 } 4007 } else { 4008 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4009 if (!isUIntN(OffsetSize, Op.getImm())) { 4010 Error(getFlatOffsetLoc(Operands), 4011 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4012 return false; 4013 } 4014 } 4015 4016 return true; 4017 } 4018 4019 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4020 // Start with second operand because SMEM Offset cannot be dst or src0. 4021 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4022 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4023 if (Op.isSMEMOffset()) 4024 return Op.getStartLoc(); 4025 } 4026 return getLoc(); 4027 } 4028 4029 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4030 const OperandVector &Operands) { 4031 if (isCI() || isSI()) 4032 return true; 4033 4034 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4035 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4036 return true; 4037 4038 auto Opcode = Inst.getOpcode(); 4039 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4040 if (OpNum == -1) 4041 return true; 4042 4043 const auto &Op = Inst.getOperand(OpNum); 4044 if (!Op.isImm()) 4045 return true; 4046 4047 uint64_t Offset = Op.getImm(); 4048 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4049 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4050 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4051 return true; 4052 4053 Error(getSMEMOffsetLoc(Operands), 4054 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4055 "expected a 21-bit signed offset"); 4056 4057 return false; 4058 } 4059 4060 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4061 unsigned Opcode = Inst.getOpcode(); 4062 const MCInstrDesc &Desc = MII.get(Opcode); 4063 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4064 return true; 4065 4066 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4067 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4068 4069 const int OpIndices[] = { Src0Idx, Src1Idx }; 4070 4071 unsigned NumExprs = 0; 4072 unsigned NumLiterals = 0; 4073 uint32_t LiteralValue; 4074 4075 for (int OpIdx : OpIndices) { 4076 if (OpIdx == -1) break; 4077 4078 const MCOperand &MO = Inst.getOperand(OpIdx); 4079 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4080 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4081 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4082 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4083 if (NumLiterals == 0 || LiteralValue != Value) { 4084 LiteralValue = Value; 4085 ++NumLiterals; 4086 } 4087 } else if (MO.isExpr()) { 4088 ++NumExprs; 4089 } 4090 } 4091 } 4092 4093 return NumLiterals + NumExprs <= 1; 4094 } 4095 4096 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4097 const unsigned Opc = Inst.getOpcode(); 4098 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4099 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4100 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4101 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4102 4103 if (OpSel & ~3) 4104 return false; 4105 } 4106 4107 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4108 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4109 if (OpSelIdx != -1) { 4110 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4111 return false; 4112 } 4113 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4114 if (OpSelHiIdx != -1) { 4115 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4116 return false; 4117 } 4118 } 4119 4120 return true; 4121 } 4122 4123 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4124 const OperandVector &Operands) { 4125 const unsigned Opc = Inst.getOpcode(); 4126 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4127 if (DppCtrlIdx < 0) 4128 return true; 4129 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4130 4131 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4132 // DPP64 is supported for row_newbcast only. 4133 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4134 if (Src0Idx >= 0 && 4135 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4136 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4137 Error(S, "64 bit dpp only supports row_newbcast"); 4138 return false; 4139 } 4140 } 4141 4142 return true; 4143 } 4144 4145 // Check if VCC register matches wavefront size 4146 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4147 auto FB = getFeatureBits(); 4148 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4149 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4150 } 4151 4152 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4153 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4154 const OperandVector &Operands) { 4155 unsigned Opcode = Inst.getOpcode(); 4156 const MCInstrDesc &Desc = MII.get(Opcode); 4157 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4158 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4159 ImmIdx == -1) 4160 return true; 4161 4162 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4163 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4164 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4165 4166 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4167 4168 unsigned NumExprs = 0; 4169 unsigned NumLiterals = 0; 4170 uint32_t LiteralValue; 4171 4172 for (int OpIdx : OpIndices) { 4173 if (OpIdx == -1) 4174 continue; 4175 4176 const MCOperand &MO = Inst.getOperand(OpIdx); 4177 if (!MO.isImm() && !MO.isExpr()) 4178 continue; 4179 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4180 continue; 4181 4182 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4183 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4184 Error(getConstLoc(Operands), 4185 "inline constants are not allowed for this operand"); 4186 return false; 4187 } 4188 4189 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4190 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4191 if (NumLiterals == 0 || LiteralValue != Value) { 4192 LiteralValue = Value; 4193 ++NumLiterals; 4194 } 4195 } else if (MO.isExpr()) { 4196 ++NumExprs; 4197 } 4198 } 4199 NumLiterals += NumExprs; 4200 4201 if (!NumLiterals) 4202 return true; 4203 4204 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4205 Error(getLitLoc(Operands), "literal operands are not supported"); 4206 return false; 4207 } 4208 4209 if (NumLiterals > 1) { 4210 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4211 return false; 4212 } 4213 4214 return true; 4215 } 4216 4217 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4218 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4219 const MCRegisterInfo *MRI) { 4220 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4221 if (OpIdx < 0) 4222 return -1; 4223 4224 const MCOperand &Op = Inst.getOperand(OpIdx); 4225 if (!Op.isReg()) 4226 return -1; 4227 4228 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4229 auto Reg = Sub ? Sub : Op.getReg(); 4230 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4231 return AGPR32.contains(Reg) ? 1 : 0; 4232 } 4233 4234 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4235 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4236 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4237 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4238 SIInstrFlags::DS)) == 0) 4239 return true; 4240 4241 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4242 : AMDGPU::OpName::vdata; 4243 4244 const MCRegisterInfo *MRI = getMRI(); 4245 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4246 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4247 4248 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4249 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4250 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4251 return false; 4252 } 4253 4254 auto FB = getFeatureBits(); 4255 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4256 if (DataAreg < 0 || DstAreg < 0) 4257 return true; 4258 return DstAreg == DataAreg; 4259 } 4260 4261 return DstAreg < 1 && DataAreg < 1; 4262 } 4263 4264 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4265 auto FB = getFeatureBits(); 4266 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4267 return true; 4268 4269 const MCRegisterInfo *MRI = getMRI(); 4270 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4271 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4272 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4273 const MCOperand &Op = Inst.getOperand(I); 4274 if (!Op.isReg()) 4275 continue; 4276 4277 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4278 if (!Sub) 4279 continue; 4280 4281 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4282 return false; 4283 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4284 return false; 4285 } 4286 4287 return true; 4288 } 4289 4290 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4291 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4292 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4293 if (Op.isBLGP()) 4294 return Op.getStartLoc(); 4295 } 4296 return SMLoc(); 4297 } 4298 4299 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4300 const OperandVector &Operands) { 4301 unsigned Opc = Inst.getOpcode(); 4302 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4303 if (BlgpIdx == -1) 4304 return true; 4305 SMLoc BLGPLoc = getBLGPLoc(Operands); 4306 if (!BLGPLoc.isValid()) 4307 return true; 4308 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4309 auto FB = getFeatureBits(); 4310 bool UsesNeg = false; 4311 if (FB[AMDGPU::FeatureGFX940Insts]) { 4312 switch (Opc) { 4313 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4314 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4315 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4316 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4317 UsesNeg = true; 4318 } 4319 } 4320 4321 if (IsNeg == UsesNeg) 4322 return true; 4323 4324 Error(BLGPLoc, 4325 UsesNeg ? "invalid modifier: blgp is not supported" 4326 : "invalid modifier: neg is not supported"); 4327 4328 return false; 4329 } 4330 4331 // gfx90a has an undocumented limitation: 4332 // DS_GWS opcodes must use even aligned registers. 4333 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4334 const OperandVector &Operands) { 4335 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4336 return true; 4337 4338 int Opc = Inst.getOpcode(); 4339 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4340 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4341 return true; 4342 4343 const MCRegisterInfo *MRI = getMRI(); 4344 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4345 int Data0Pos = 4346 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4347 assert(Data0Pos != -1); 4348 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4349 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4350 if (RegIdx & 1) { 4351 SMLoc RegLoc = getRegLoc(Reg, Operands); 4352 Error(RegLoc, "vgpr must be even aligned"); 4353 return false; 4354 } 4355 4356 return true; 4357 } 4358 4359 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4360 const OperandVector &Operands, 4361 const SMLoc &IDLoc) { 4362 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4363 AMDGPU::OpName::cpol); 4364 if (CPolPos == -1) 4365 return true; 4366 4367 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4368 4369 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4370 if ((TSFlags & (SIInstrFlags::SMRD)) && 4371 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4372 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4373 return false; 4374 } 4375 4376 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4377 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4378 StringRef CStr(S.getPointer()); 4379 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4380 Error(S, "scc is not supported on this GPU"); 4381 return false; 4382 } 4383 4384 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4385 return true; 4386 4387 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4388 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4389 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4390 : "instruction must use glc"); 4391 return false; 4392 } 4393 } else { 4394 if (CPol & CPol::GLC) { 4395 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4396 StringRef CStr(S.getPointer()); 4397 S = SMLoc::getFromPointer( 4398 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4399 Error(S, isGFX940() ? "instruction must not use sc0" 4400 : "instruction must not use glc"); 4401 return false; 4402 } 4403 } 4404 4405 return true; 4406 } 4407 4408 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4409 const SMLoc &IDLoc, 4410 const OperandVector &Operands) { 4411 if (auto ErrMsg = validateLdsDirect(Inst)) { 4412 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4413 return false; 4414 } 4415 if (!validateSOPLiteral(Inst)) { 4416 Error(getLitLoc(Operands), 4417 "only one literal operand is allowed"); 4418 return false; 4419 } 4420 if (!validateVOPLiteral(Inst, Operands)) { 4421 return false; 4422 } 4423 if (!validateConstantBusLimitations(Inst, Operands)) { 4424 return false; 4425 } 4426 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4427 return false; 4428 } 4429 if (!validateIntClampSupported(Inst)) { 4430 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4431 "integer clamping is not supported on this GPU"); 4432 return false; 4433 } 4434 if (!validateOpSel(Inst)) { 4435 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4436 "invalid op_sel operand"); 4437 return false; 4438 } 4439 if (!validateDPP(Inst, Operands)) { 4440 return false; 4441 } 4442 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4443 if (!validateMIMGD16(Inst)) { 4444 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4445 "d16 modifier is not supported on this GPU"); 4446 return false; 4447 } 4448 if (!validateMIMGDim(Inst)) { 4449 Error(IDLoc, "dim modifier is required on this GPU"); 4450 return false; 4451 } 4452 if (!validateMIMGMSAA(Inst)) { 4453 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4454 "invalid dim; must be MSAA type"); 4455 return false; 4456 } 4457 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4458 Error(IDLoc, *ErrMsg); 4459 return false; 4460 } 4461 if (!validateMIMGAddrSize(Inst)) { 4462 Error(IDLoc, 4463 "image address size does not match dim and a16"); 4464 return false; 4465 } 4466 if (!validateMIMGAtomicDMask(Inst)) { 4467 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4468 "invalid atomic image dmask"); 4469 return false; 4470 } 4471 if (!validateMIMGGatherDMask(Inst)) { 4472 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4473 "invalid image_gather dmask: only one bit must be set"); 4474 return false; 4475 } 4476 if (!validateMovrels(Inst, Operands)) { 4477 return false; 4478 } 4479 if (!validateFlatOffset(Inst, Operands)) { 4480 return false; 4481 } 4482 if (!validateSMEMOffset(Inst, Operands)) { 4483 return false; 4484 } 4485 if (!validateMAIAccWrite(Inst, Operands)) { 4486 return false; 4487 } 4488 if (!validateMFMA(Inst, Operands)) { 4489 return false; 4490 } 4491 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4492 return false; 4493 } 4494 4495 if (!validateAGPRLdSt(Inst)) { 4496 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4497 ? "invalid register class: data and dst should be all VGPR or AGPR" 4498 : "invalid register class: agpr loads and stores not supported on this GPU" 4499 ); 4500 return false; 4501 } 4502 if (!validateVGPRAlign(Inst)) { 4503 Error(IDLoc, 4504 "invalid register class: vgpr tuples must be 64 bit aligned"); 4505 return false; 4506 } 4507 if (!validateGWS(Inst, Operands)) { 4508 return false; 4509 } 4510 4511 if (!validateBLGP(Inst, Operands)) { 4512 return false; 4513 } 4514 4515 if (!validateDivScale(Inst)) { 4516 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4517 return false; 4518 } 4519 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4520 return false; 4521 } 4522 4523 return true; 4524 } 4525 4526 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4527 const FeatureBitset &FBS, 4528 unsigned VariantID = 0); 4529 4530 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4531 const FeatureBitset &AvailableFeatures, 4532 unsigned VariantID); 4533 4534 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4535 const FeatureBitset &FBS) { 4536 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4537 } 4538 4539 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4540 const FeatureBitset &FBS, 4541 ArrayRef<unsigned> Variants) { 4542 for (auto Variant : Variants) { 4543 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4544 return true; 4545 } 4546 4547 return false; 4548 } 4549 4550 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4551 const SMLoc &IDLoc) { 4552 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4553 4554 // Check if requested instruction variant is supported. 4555 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4556 return false; 4557 4558 // This instruction is not supported. 4559 // Clear any other pending errors because they are no longer relevant. 4560 getParser().clearPendingErrors(); 4561 4562 // Requested instruction variant is not supported. 4563 // Check if any other variants are supported. 4564 StringRef VariantName = getMatchedVariantName(); 4565 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4566 return Error(IDLoc, 4567 Twine(VariantName, 4568 " variant of this instruction is not supported")); 4569 } 4570 4571 // Finally check if this instruction is supported on any other GPU. 4572 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4573 return Error(IDLoc, "instruction not supported on this GPU"); 4574 } 4575 4576 // Instruction not supported on any GPU. Probably a typo. 4577 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4578 return Error(IDLoc, "invalid instruction" + Suggestion); 4579 } 4580 4581 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4582 OperandVector &Operands, 4583 MCStreamer &Out, 4584 uint64_t &ErrorInfo, 4585 bool MatchingInlineAsm) { 4586 MCInst Inst; 4587 unsigned Result = Match_Success; 4588 for (auto Variant : getMatchedVariants()) { 4589 uint64_t EI; 4590 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4591 Variant); 4592 // We order match statuses from least to most specific. We use most specific 4593 // status as resulting 4594 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4595 if ((R == Match_Success) || 4596 (R == Match_PreferE32) || 4597 (R == Match_MissingFeature && Result != Match_PreferE32) || 4598 (R == Match_InvalidOperand && Result != Match_MissingFeature 4599 && Result != Match_PreferE32) || 4600 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4601 && Result != Match_MissingFeature 4602 && Result != Match_PreferE32)) { 4603 Result = R; 4604 ErrorInfo = EI; 4605 } 4606 if (R == Match_Success) 4607 break; 4608 } 4609 4610 if (Result == Match_Success) { 4611 if (!validateInstruction(Inst, IDLoc, Operands)) { 4612 return true; 4613 } 4614 Inst.setLoc(IDLoc); 4615 Out.emitInstruction(Inst, getSTI()); 4616 return false; 4617 } 4618 4619 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4620 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4621 return true; 4622 } 4623 4624 switch (Result) { 4625 default: break; 4626 case Match_MissingFeature: 4627 // It has been verified that the specified instruction 4628 // mnemonic is valid. A match was found but it requires 4629 // features which are not supported on this GPU. 4630 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4631 4632 case Match_InvalidOperand: { 4633 SMLoc ErrorLoc = IDLoc; 4634 if (ErrorInfo != ~0ULL) { 4635 if (ErrorInfo >= Operands.size()) { 4636 return Error(IDLoc, "too few operands for instruction"); 4637 } 4638 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4639 if (ErrorLoc == SMLoc()) 4640 ErrorLoc = IDLoc; 4641 } 4642 return Error(ErrorLoc, "invalid operand for instruction"); 4643 } 4644 4645 case Match_PreferE32: 4646 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4647 "should be encoded as e32"); 4648 case Match_MnemonicFail: 4649 llvm_unreachable("Invalid instructions should have been handled already"); 4650 } 4651 llvm_unreachable("Implement any new match types added!"); 4652 } 4653 4654 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4655 int64_t Tmp = -1; 4656 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4657 return true; 4658 } 4659 if (getParser().parseAbsoluteExpression(Tmp)) { 4660 return true; 4661 } 4662 Ret = static_cast<uint32_t>(Tmp); 4663 return false; 4664 } 4665 4666 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4667 uint32_t &Minor) { 4668 if (ParseAsAbsoluteExpression(Major)) 4669 return TokError("invalid major version"); 4670 4671 if (!trySkipToken(AsmToken::Comma)) 4672 return TokError("minor version number required, comma expected"); 4673 4674 if (ParseAsAbsoluteExpression(Minor)) 4675 return TokError("invalid minor version"); 4676 4677 return false; 4678 } 4679 4680 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4681 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4682 return TokError("directive only supported for amdgcn architecture"); 4683 4684 std::string TargetIDDirective; 4685 SMLoc TargetStart = getTok().getLoc(); 4686 if (getParser().parseEscapedString(TargetIDDirective)) 4687 return true; 4688 4689 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4690 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4691 return getParser().Error(TargetRange.Start, 4692 (Twine(".amdgcn_target directive's target id ") + 4693 Twine(TargetIDDirective) + 4694 Twine(" does not match the specified target id ") + 4695 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4696 4697 return false; 4698 } 4699 4700 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4701 return Error(Range.Start, "value out of range", Range); 4702 } 4703 4704 bool AMDGPUAsmParser::calculateGPRBlocks( 4705 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4706 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4707 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4708 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4709 // TODO(scott.linder): These calculations are duplicated from 4710 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4711 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4712 4713 unsigned NumVGPRs = NextFreeVGPR; 4714 unsigned NumSGPRs = NextFreeSGPR; 4715 4716 if (Version.Major >= 10) 4717 NumSGPRs = 0; 4718 else { 4719 unsigned MaxAddressableNumSGPRs = 4720 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4721 4722 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4723 NumSGPRs > MaxAddressableNumSGPRs) 4724 return OutOfRangeError(SGPRRange); 4725 4726 NumSGPRs += 4727 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4728 4729 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4730 NumSGPRs > MaxAddressableNumSGPRs) 4731 return OutOfRangeError(SGPRRange); 4732 4733 if (Features.test(FeatureSGPRInitBug)) 4734 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4735 } 4736 4737 VGPRBlocks = 4738 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4739 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4740 4741 return false; 4742 } 4743 4744 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4745 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4746 return TokError("directive only supported for amdgcn architecture"); 4747 4748 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4749 return TokError("directive only supported for amdhsa OS"); 4750 4751 StringRef KernelName; 4752 if (getParser().parseIdentifier(KernelName)) 4753 return true; 4754 4755 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4756 4757 StringSet<> Seen; 4758 4759 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4760 4761 SMRange VGPRRange; 4762 uint64_t NextFreeVGPR = 0; 4763 uint64_t AccumOffset = 0; 4764 uint64_t SharedVGPRCount = 0; 4765 SMRange SGPRRange; 4766 uint64_t NextFreeSGPR = 0; 4767 4768 // Count the number of user SGPRs implied from the enabled feature bits. 4769 unsigned ImpliedUserSGPRCount = 0; 4770 4771 // Track if the asm explicitly contains the directive for the user SGPR 4772 // count. 4773 Optional<unsigned> ExplicitUserSGPRCount; 4774 bool ReserveVCC = true; 4775 bool ReserveFlatScr = true; 4776 Optional<bool> EnableWavefrontSize32; 4777 4778 while (true) { 4779 while (trySkipToken(AsmToken::EndOfStatement)); 4780 4781 StringRef ID; 4782 SMRange IDRange = getTok().getLocRange(); 4783 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4784 return true; 4785 4786 if (ID == ".end_amdhsa_kernel") 4787 break; 4788 4789 if (Seen.find(ID) != Seen.end()) 4790 return TokError(".amdhsa_ directives cannot be repeated"); 4791 Seen.insert(ID); 4792 4793 SMLoc ValStart = getLoc(); 4794 int64_t IVal; 4795 if (getParser().parseAbsoluteExpression(IVal)) 4796 return true; 4797 SMLoc ValEnd = getLoc(); 4798 SMRange ValRange = SMRange(ValStart, ValEnd); 4799 4800 if (IVal < 0) 4801 return OutOfRangeError(ValRange); 4802 4803 uint64_t Val = IVal; 4804 4805 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4806 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4807 return OutOfRangeError(RANGE); \ 4808 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4809 4810 if (ID == ".amdhsa_group_segment_fixed_size") { 4811 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4812 return OutOfRangeError(ValRange); 4813 KD.group_segment_fixed_size = Val; 4814 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4815 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4816 return OutOfRangeError(ValRange); 4817 KD.private_segment_fixed_size = Val; 4818 } else if (ID == ".amdhsa_kernarg_size") { 4819 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4820 return OutOfRangeError(ValRange); 4821 KD.kernarg_size = Val; 4822 } else if (ID == ".amdhsa_user_sgpr_count") { 4823 ExplicitUserSGPRCount = Val; 4824 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4825 if (hasArchitectedFlatScratch()) 4826 return Error(IDRange.Start, 4827 "directive is not supported with architected flat scratch", 4828 IDRange); 4829 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4830 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4831 Val, ValRange); 4832 if (Val) 4833 ImpliedUserSGPRCount += 4; 4834 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4835 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4836 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4837 ValRange); 4838 if (Val) 4839 ImpliedUserSGPRCount += 2; 4840 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4841 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4842 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4843 ValRange); 4844 if (Val) 4845 ImpliedUserSGPRCount += 2; 4846 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4847 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4848 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4849 Val, ValRange); 4850 if (Val) 4851 ImpliedUserSGPRCount += 2; 4852 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4853 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4854 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4855 ValRange); 4856 if (Val) 4857 ImpliedUserSGPRCount += 2; 4858 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4859 if (hasArchitectedFlatScratch()) 4860 return Error(IDRange.Start, 4861 "directive is not supported with architected flat scratch", 4862 IDRange); 4863 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4864 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4865 ValRange); 4866 if (Val) 4867 ImpliedUserSGPRCount += 2; 4868 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4869 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4870 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4871 Val, ValRange); 4872 if (Val) 4873 ImpliedUserSGPRCount += 1; 4874 } else if (ID == ".amdhsa_wavefront_size32") { 4875 if (IVersion.Major < 10) 4876 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4877 EnableWavefrontSize32 = Val; 4878 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4879 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4880 Val, ValRange); 4881 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4882 if (hasArchitectedFlatScratch()) 4883 return Error(IDRange.Start, 4884 "directive is not supported with architected flat scratch", 4885 IDRange); 4886 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4887 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4888 } else if (ID == ".amdhsa_enable_private_segment") { 4889 if (!hasArchitectedFlatScratch()) 4890 return Error( 4891 IDRange.Start, 4892 "directive is not supported without architected flat scratch", 4893 IDRange); 4894 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4895 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4896 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4897 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4898 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4899 ValRange); 4900 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4901 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4902 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4903 ValRange); 4904 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4905 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4906 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4907 ValRange); 4908 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4909 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4910 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4911 ValRange); 4912 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4913 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4914 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4915 ValRange); 4916 } else if (ID == ".amdhsa_next_free_vgpr") { 4917 VGPRRange = ValRange; 4918 NextFreeVGPR = Val; 4919 } else if (ID == ".amdhsa_next_free_sgpr") { 4920 SGPRRange = ValRange; 4921 NextFreeSGPR = Val; 4922 } else if (ID == ".amdhsa_accum_offset") { 4923 if (!isGFX90A()) 4924 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4925 AccumOffset = Val; 4926 } else if (ID == ".amdhsa_reserve_vcc") { 4927 if (!isUInt<1>(Val)) 4928 return OutOfRangeError(ValRange); 4929 ReserveVCC = Val; 4930 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4931 if (IVersion.Major < 7) 4932 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4933 if (hasArchitectedFlatScratch()) 4934 return Error(IDRange.Start, 4935 "directive is not supported with architected flat scratch", 4936 IDRange); 4937 if (!isUInt<1>(Val)) 4938 return OutOfRangeError(ValRange); 4939 ReserveFlatScr = Val; 4940 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4941 if (IVersion.Major < 8) 4942 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4943 if (!isUInt<1>(Val)) 4944 return OutOfRangeError(ValRange); 4945 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4946 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4947 IDRange); 4948 } else if (ID == ".amdhsa_float_round_mode_32") { 4949 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4950 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4951 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4952 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4953 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4954 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4955 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4956 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4957 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4958 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4959 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4960 ValRange); 4961 } else if (ID == ".amdhsa_dx10_clamp") { 4962 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4963 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4964 } else if (ID == ".amdhsa_ieee_mode") { 4965 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4966 Val, ValRange); 4967 } else if (ID == ".amdhsa_fp16_overflow") { 4968 if (IVersion.Major < 9) 4969 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4970 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4971 ValRange); 4972 } else if (ID == ".amdhsa_tg_split") { 4973 if (!isGFX90A()) 4974 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4975 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4976 ValRange); 4977 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4978 if (IVersion.Major < 10) 4979 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4980 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4981 ValRange); 4982 } else if (ID == ".amdhsa_memory_ordered") { 4983 if (IVersion.Major < 10) 4984 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4985 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4986 ValRange); 4987 } else if (ID == ".amdhsa_forward_progress") { 4988 if (IVersion.Major < 10) 4989 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4990 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4991 ValRange); 4992 } else if (ID == ".amdhsa_shared_vgpr_count") { 4993 if (IVersion.Major < 10) 4994 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4995 SharedVGPRCount = Val; 4996 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4997 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4998 ValRange); 4999 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5000 PARSE_BITS_ENTRY( 5001 KD.compute_pgm_rsrc2, 5002 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5003 ValRange); 5004 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5005 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5006 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5007 Val, ValRange); 5008 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5009 PARSE_BITS_ENTRY( 5010 KD.compute_pgm_rsrc2, 5011 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5012 ValRange); 5013 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5015 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5016 Val, ValRange); 5017 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5018 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5019 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5020 Val, ValRange); 5021 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5022 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5023 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5024 Val, ValRange); 5025 } else if (ID == ".amdhsa_exception_int_div_zero") { 5026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5027 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5028 Val, ValRange); 5029 } else { 5030 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5031 } 5032 5033 #undef PARSE_BITS_ENTRY 5034 } 5035 5036 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5037 return TokError(".amdhsa_next_free_vgpr directive is required"); 5038 5039 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5040 return TokError(".amdhsa_next_free_sgpr directive is required"); 5041 5042 unsigned VGPRBlocks; 5043 unsigned SGPRBlocks; 5044 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5045 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5046 EnableWavefrontSize32, NextFreeVGPR, 5047 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5048 SGPRBlocks)) 5049 return true; 5050 5051 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5052 VGPRBlocks)) 5053 return OutOfRangeError(VGPRRange); 5054 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5055 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5056 5057 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5058 SGPRBlocks)) 5059 return OutOfRangeError(SGPRRange); 5060 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5061 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5062 SGPRBlocks); 5063 5064 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5065 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5066 "enabled user SGPRs"); 5067 5068 unsigned UserSGPRCount = 5069 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5070 5071 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5072 return TokError("too many user SGPRs enabled"); 5073 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5074 UserSGPRCount); 5075 5076 if (isGFX90A()) { 5077 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5078 return TokError(".amdhsa_accum_offset directive is required"); 5079 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5080 return TokError("accum_offset should be in range [4..256] in " 5081 "increments of 4"); 5082 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5083 return TokError("accum_offset exceeds total VGPR allocation"); 5084 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5085 (AccumOffset / 4 - 1)); 5086 } 5087 5088 if (IVersion.Major == 10) { 5089 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5090 if (SharedVGPRCount && EnableWavefrontSize32) { 5091 return TokError("shared_vgpr_count directive not valid on " 5092 "wavefront size 32"); 5093 } 5094 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5095 return TokError("shared_vgpr_count*2 + " 5096 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5097 "exceed 63\n"); 5098 } 5099 } 5100 5101 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5102 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5103 ReserveFlatScr); 5104 return false; 5105 } 5106 5107 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5108 uint32_t Major; 5109 uint32_t Minor; 5110 5111 if (ParseDirectiveMajorMinor(Major, Minor)) 5112 return true; 5113 5114 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5115 return false; 5116 } 5117 5118 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5119 uint32_t Major; 5120 uint32_t Minor; 5121 uint32_t Stepping; 5122 StringRef VendorName; 5123 StringRef ArchName; 5124 5125 // If this directive has no arguments, then use the ISA version for the 5126 // targeted GPU. 5127 if (isToken(AsmToken::EndOfStatement)) { 5128 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5129 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5130 ISA.Stepping, 5131 "AMD", "AMDGPU"); 5132 return false; 5133 } 5134 5135 if (ParseDirectiveMajorMinor(Major, Minor)) 5136 return true; 5137 5138 if (!trySkipToken(AsmToken::Comma)) 5139 return TokError("stepping version number required, comma expected"); 5140 5141 if (ParseAsAbsoluteExpression(Stepping)) 5142 return TokError("invalid stepping version"); 5143 5144 if (!trySkipToken(AsmToken::Comma)) 5145 return TokError("vendor name required, comma expected"); 5146 5147 if (!parseString(VendorName, "invalid vendor name")) 5148 return true; 5149 5150 if (!trySkipToken(AsmToken::Comma)) 5151 return TokError("arch name required, comma expected"); 5152 5153 if (!parseString(ArchName, "invalid arch name")) 5154 return true; 5155 5156 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5157 VendorName, ArchName); 5158 return false; 5159 } 5160 5161 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5162 amd_kernel_code_t &Header) { 5163 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5164 // assembly for backwards compatibility. 5165 if (ID == "max_scratch_backing_memory_byte_size") { 5166 Parser.eatToEndOfStatement(); 5167 return false; 5168 } 5169 5170 SmallString<40> ErrStr; 5171 raw_svector_ostream Err(ErrStr); 5172 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5173 return TokError(Err.str()); 5174 } 5175 Lex(); 5176 5177 if (ID == "enable_wavefront_size32") { 5178 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5179 if (!isGFX10Plus()) 5180 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5181 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5182 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5183 } else { 5184 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5185 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5186 } 5187 } 5188 5189 if (ID == "wavefront_size") { 5190 if (Header.wavefront_size == 5) { 5191 if (!isGFX10Plus()) 5192 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5193 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5194 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5195 } else if (Header.wavefront_size == 6) { 5196 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5197 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5198 } 5199 } 5200 5201 if (ID == "enable_wgp_mode") { 5202 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5203 !isGFX10Plus()) 5204 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5205 } 5206 5207 if (ID == "enable_mem_ordered") { 5208 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5209 !isGFX10Plus()) 5210 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5211 } 5212 5213 if (ID == "enable_fwd_progress") { 5214 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5215 !isGFX10Plus()) 5216 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5217 } 5218 5219 return false; 5220 } 5221 5222 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5223 amd_kernel_code_t Header; 5224 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5225 5226 while (true) { 5227 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5228 // will set the current token to EndOfStatement. 5229 while(trySkipToken(AsmToken::EndOfStatement)); 5230 5231 StringRef ID; 5232 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5233 return true; 5234 5235 if (ID == ".end_amd_kernel_code_t") 5236 break; 5237 5238 if (ParseAMDKernelCodeTValue(ID, Header)) 5239 return true; 5240 } 5241 5242 getTargetStreamer().EmitAMDKernelCodeT(Header); 5243 5244 return false; 5245 } 5246 5247 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5248 StringRef KernelName; 5249 if (!parseId(KernelName, "expected symbol name")) 5250 return true; 5251 5252 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5253 ELF::STT_AMDGPU_HSA_KERNEL); 5254 5255 KernelScope.initialize(getContext()); 5256 return false; 5257 } 5258 5259 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5260 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5261 return Error(getLoc(), 5262 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5263 "architectures"); 5264 } 5265 5266 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5267 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5268 return Error(getParser().getTok().getLoc(), "target id must match options"); 5269 5270 getTargetStreamer().EmitISAVersion(); 5271 Lex(); 5272 5273 return false; 5274 } 5275 5276 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5277 const char *AssemblerDirectiveBegin; 5278 const char *AssemblerDirectiveEnd; 5279 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5280 isHsaAbiVersion3AndAbove(&getSTI()) 5281 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5282 HSAMD::V3::AssemblerDirectiveEnd) 5283 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5284 HSAMD::AssemblerDirectiveEnd); 5285 5286 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5287 return Error(getLoc(), 5288 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5289 "not available on non-amdhsa OSes")).str()); 5290 } 5291 5292 std::string HSAMetadataString; 5293 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5294 HSAMetadataString)) 5295 return true; 5296 5297 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5298 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5299 return Error(getLoc(), "invalid HSA metadata"); 5300 } else { 5301 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5302 return Error(getLoc(), "invalid HSA metadata"); 5303 } 5304 5305 return false; 5306 } 5307 5308 /// Common code to parse out a block of text (typically YAML) between start and 5309 /// end directives. 5310 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5311 const char *AssemblerDirectiveEnd, 5312 std::string &CollectString) { 5313 5314 raw_string_ostream CollectStream(CollectString); 5315 5316 getLexer().setSkipSpace(false); 5317 5318 bool FoundEnd = false; 5319 while (!isToken(AsmToken::Eof)) { 5320 while (isToken(AsmToken::Space)) { 5321 CollectStream << getTokenStr(); 5322 Lex(); 5323 } 5324 5325 if (trySkipId(AssemblerDirectiveEnd)) { 5326 FoundEnd = true; 5327 break; 5328 } 5329 5330 CollectStream << Parser.parseStringToEndOfStatement() 5331 << getContext().getAsmInfo()->getSeparatorString(); 5332 5333 Parser.eatToEndOfStatement(); 5334 } 5335 5336 getLexer().setSkipSpace(true); 5337 5338 if (isToken(AsmToken::Eof) && !FoundEnd) { 5339 return TokError(Twine("expected directive ") + 5340 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5341 } 5342 5343 CollectStream.flush(); 5344 return false; 5345 } 5346 5347 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5348 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5349 std::string String; 5350 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5351 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5352 return true; 5353 5354 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5355 if (!PALMetadata->setFromString(String)) 5356 return Error(getLoc(), "invalid PAL metadata"); 5357 return false; 5358 } 5359 5360 /// Parse the assembler directive for old linear-format PAL metadata. 5361 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5362 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5363 return Error(getLoc(), 5364 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5365 "not available on non-amdpal OSes")).str()); 5366 } 5367 5368 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5369 PALMetadata->setLegacy(); 5370 for (;;) { 5371 uint32_t Key, Value; 5372 if (ParseAsAbsoluteExpression(Key)) { 5373 return TokError(Twine("invalid value in ") + 5374 Twine(PALMD::AssemblerDirective)); 5375 } 5376 if (!trySkipToken(AsmToken::Comma)) { 5377 return TokError(Twine("expected an even number of values in ") + 5378 Twine(PALMD::AssemblerDirective)); 5379 } 5380 if (ParseAsAbsoluteExpression(Value)) { 5381 return TokError(Twine("invalid value in ") + 5382 Twine(PALMD::AssemblerDirective)); 5383 } 5384 PALMetadata->setRegister(Key, Value); 5385 if (!trySkipToken(AsmToken::Comma)) 5386 break; 5387 } 5388 return false; 5389 } 5390 5391 /// ParseDirectiveAMDGPULDS 5392 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5393 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5394 if (getParser().checkForValidSection()) 5395 return true; 5396 5397 StringRef Name; 5398 SMLoc NameLoc = getLoc(); 5399 if (getParser().parseIdentifier(Name)) 5400 return TokError("expected identifier in directive"); 5401 5402 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5403 if (parseToken(AsmToken::Comma, "expected ','")) 5404 return true; 5405 5406 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5407 5408 int64_t Size; 5409 SMLoc SizeLoc = getLoc(); 5410 if (getParser().parseAbsoluteExpression(Size)) 5411 return true; 5412 if (Size < 0) 5413 return Error(SizeLoc, "size must be non-negative"); 5414 if (Size > LocalMemorySize) 5415 return Error(SizeLoc, "size is too large"); 5416 5417 int64_t Alignment = 4; 5418 if (trySkipToken(AsmToken::Comma)) { 5419 SMLoc AlignLoc = getLoc(); 5420 if (getParser().parseAbsoluteExpression(Alignment)) 5421 return true; 5422 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5423 return Error(AlignLoc, "alignment must be a power of two"); 5424 5425 // Alignment larger than the size of LDS is possible in theory, as long 5426 // as the linker manages to place to symbol at address 0, but we do want 5427 // to make sure the alignment fits nicely into a 32-bit integer. 5428 if (Alignment >= 1u << 31) 5429 return Error(AlignLoc, "alignment is too large"); 5430 } 5431 5432 if (parseToken(AsmToken::EndOfStatement, 5433 "unexpected token in '.amdgpu_lds' directive")) 5434 return true; 5435 5436 Symbol->redefineIfPossible(); 5437 if (!Symbol->isUndefined()) 5438 return Error(NameLoc, "invalid symbol redefinition"); 5439 5440 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5441 return false; 5442 } 5443 5444 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5445 StringRef IDVal = DirectiveID.getString(); 5446 5447 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5448 if (IDVal == ".amdhsa_kernel") 5449 return ParseDirectiveAMDHSAKernel(); 5450 5451 // TODO: Restructure/combine with PAL metadata directive. 5452 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5453 return ParseDirectiveHSAMetadata(); 5454 } else { 5455 if (IDVal == ".hsa_code_object_version") 5456 return ParseDirectiveHSACodeObjectVersion(); 5457 5458 if (IDVal == ".hsa_code_object_isa") 5459 return ParseDirectiveHSACodeObjectISA(); 5460 5461 if (IDVal == ".amd_kernel_code_t") 5462 return ParseDirectiveAMDKernelCodeT(); 5463 5464 if (IDVal == ".amdgpu_hsa_kernel") 5465 return ParseDirectiveAMDGPUHsaKernel(); 5466 5467 if (IDVal == ".amd_amdgpu_isa") 5468 return ParseDirectiveISAVersion(); 5469 5470 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5471 return ParseDirectiveHSAMetadata(); 5472 } 5473 5474 if (IDVal == ".amdgcn_target") 5475 return ParseDirectiveAMDGCNTarget(); 5476 5477 if (IDVal == ".amdgpu_lds") 5478 return ParseDirectiveAMDGPULDS(); 5479 5480 if (IDVal == PALMD::AssemblerDirectiveBegin) 5481 return ParseDirectivePALMetadataBegin(); 5482 5483 if (IDVal == PALMD::AssemblerDirective) 5484 return ParseDirectivePALMetadata(); 5485 5486 return true; 5487 } 5488 5489 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5490 unsigned RegNo) { 5491 5492 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5493 return isGFX9Plus(); 5494 5495 // GFX10 has 2 more SGPRs 104 and 105. 5496 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5497 return hasSGPR104_SGPR105(); 5498 5499 switch (RegNo) { 5500 case AMDGPU::SRC_SHARED_BASE: 5501 case AMDGPU::SRC_SHARED_LIMIT: 5502 case AMDGPU::SRC_PRIVATE_BASE: 5503 case AMDGPU::SRC_PRIVATE_LIMIT: 5504 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5505 return isGFX9Plus(); 5506 case AMDGPU::TBA: 5507 case AMDGPU::TBA_LO: 5508 case AMDGPU::TBA_HI: 5509 case AMDGPU::TMA: 5510 case AMDGPU::TMA_LO: 5511 case AMDGPU::TMA_HI: 5512 return !isGFX9Plus(); 5513 case AMDGPU::XNACK_MASK: 5514 case AMDGPU::XNACK_MASK_LO: 5515 case AMDGPU::XNACK_MASK_HI: 5516 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5517 case AMDGPU::SGPR_NULL: 5518 return isGFX10Plus(); 5519 default: 5520 break; 5521 } 5522 5523 if (isCI()) 5524 return true; 5525 5526 if (isSI() || isGFX10Plus()) { 5527 // No flat_scr on SI. 5528 // On GFX10 flat scratch is not a valid register operand and can only be 5529 // accessed with s_setreg/s_getreg. 5530 switch (RegNo) { 5531 case AMDGPU::FLAT_SCR: 5532 case AMDGPU::FLAT_SCR_LO: 5533 case AMDGPU::FLAT_SCR_HI: 5534 return false; 5535 default: 5536 return true; 5537 } 5538 } 5539 5540 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5541 // SI/CI have. 5542 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5543 return hasSGPR102_SGPR103(); 5544 5545 return true; 5546 } 5547 5548 OperandMatchResultTy 5549 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5550 OperandMode Mode) { 5551 // Try to parse with a custom parser 5552 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5553 5554 // If we successfully parsed the operand or if there as an error parsing, 5555 // we are done. 5556 // 5557 // If we are parsing after we reach EndOfStatement then this means we 5558 // are appending default values to the Operands list. This is only done 5559 // by custom parser, so we shouldn't continue on to the generic parsing. 5560 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5561 isToken(AsmToken::EndOfStatement)) 5562 return ResTy; 5563 5564 SMLoc RBraceLoc; 5565 SMLoc LBraceLoc = getLoc(); 5566 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5567 unsigned Prefix = Operands.size(); 5568 5569 for (;;) { 5570 auto Loc = getLoc(); 5571 ResTy = parseReg(Operands); 5572 if (ResTy == MatchOperand_NoMatch) 5573 Error(Loc, "expected a register"); 5574 if (ResTy != MatchOperand_Success) 5575 return MatchOperand_ParseFail; 5576 5577 RBraceLoc = getLoc(); 5578 if (trySkipToken(AsmToken::RBrac)) 5579 break; 5580 5581 if (!skipToken(AsmToken::Comma, 5582 "expected a comma or a closing square bracket")) { 5583 return MatchOperand_ParseFail; 5584 } 5585 } 5586 5587 if (Operands.size() - Prefix > 1) { 5588 Operands.insert(Operands.begin() + Prefix, 5589 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5590 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5591 } 5592 5593 return MatchOperand_Success; 5594 } 5595 5596 return parseRegOrImm(Operands); 5597 } 5598 5599 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5600 // Clear any forced encodings from the previous instruction. 5601 setForcedEncodingSize(0); 5602 setForcedDPP(false); 5603 setForcedSDWA(false); 5604 5605 if (Name.endswith("_e64")) { 5606 setForcedEncodingSize(64); 5607 return Name.substr(0, Name.size() - 4); 5608 } else if (Name.endswith("_e32")) { 5609 setForcedEncodingSize(32); 5610 return Name.substr(0, Name.size() - 4); 5611 } else if (Name.endswith("_dpp")) { 5612 setForcedDPP(true); 5613 return Name.substr(0, Name.size() - 4); 5614 } else if (Name.endswith("_sdwa")) { 5615 setForcedSDWA(true); 5616 return Name.substr(0, Name.size() - 5); 5617 } 5618 return Name; 5619 } 5620 5621 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5622 StringRef Name, 5623 SMLoc NameLoc, OperandVector &Operands) { 5624 // Add the instruction mnemonic 5625 Name = parseMnemonicSuffix(Name); 5626 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5627 5628 bool IsMIMG = Name.startswith("image_"); 5629 5630 while (!trySkipToken(AsmToken::EndOfStatement)) { 5631 OperandMode Mode = OperandMode_Default; 5632 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5633 Mode = OperandMode_NSA; 5634 CPolSeen = 0; 5635 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5636 5637 if (Res != MatchOperand_Success) { 5638 checkUnsupportedInstruction(Name, NameLoc); 5639 if (!Parser.hasPendingError()) { 5640 // FIXME: use real operand location rather than the current location. 5641 StringRef Msg = 5642 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5643 "not a valid operand."; 5644 Error(getLoc(), Msg); 5645 } 5646 while (!trySkipToken(AsmToken::EndOfStatement)) { 5647 lex(); 5648 } 5649 return true; 5650 } 5651 5652 // Eat the comma or space if there is one. 5653 trySkipToken(AsmToken::Comma); 5654 } 5655 5656 return false; 5657 } 5658 5659 //===----------------------------------------------------------------------===// 5660 // Utility functions 5661 //===----------------------------------------------------------------------===// 5662 5663 OperandMatchResultTy 5664 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5665 5666 if (!trySkipId(Prefix, AsmToken::Colon)) 5667 return MatchOperand_NoMatch; 5668 5669 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5670 } 5671 5672 OperandMatchResultTy 5673 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5674 AMDGPUOperand::ImmTy ImmTy, 5675 bool (*ConvertResult)(int64_t&)) { 5676 SMLoc S = getLoc(); 5677 int64_t Value = 0; 5678 5679 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5680 if (Res != MatchOperand_Success) 5681 return Res; 5682 5683 if (ConvertResult && !ConvertResult(Value)) { 5684 Error(S, "invalid " + StringRef(Prefix) + " value."); 5685 } 5686 5687 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5688 return MatchOperand_Success; 5689 } 5690 5691 OperandMatchResultTy 5692 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5693 OperandVector &Operands, 5694 AMDGPUOperand::ImmTy ImmTy, 5695 bool (*ConvertResult)(int64_t&)) { 5696 SMLoc S = getLoc(); 5697 if (!trySkipId(Prefix, AsmToken::Colon)) 5698 return MatchOperand_NoMatch; 5699 5700 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5701 return MatchOperand_ParseFail; 5702 5703 unsigned Val = 0; 5704 const unsigned MaxSize = 4; 5705 5706 // FIXME: How to verify the number of elements matches the number of src 5707 // operands? 5708 for (int I = 0; ; ++I) { 5709 int64_t Op; 5710 SMLoc Loc = getLoc(); 5711 if (!parseExpr(Op)) 5712 return MatchOperand_ParseFail; 5713 5714 if (Op != 0 && Op != 1) { 5715 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5716 return MatchOperand_ParseFail; 5717 } 5718 5719 Val |= (Op << I); 5720 5721 if (trySkipToken(AsmToken::RBrac)) 5722 break; 5723 5724 if (I + 1 == MaxSize) { 5725 Error(getLoc(), "expected a closing square bracket"); 5726 return MatchOperand_ParseFail; 5727 } 5728 5729 if (!skipToken(AsmToken::Comma, "expected a comma")) 5730 return MatchOperand_ParseFail; 5731 } 5732 5733 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5734 return MatchOperand_Success; 5735 } 5736 5737 OperandMatchResultTy 5738 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5739 AMDGPUOperand::ImmTy ImmTy) { 5740 int64_t Bit; 5741 SMLoc S = getLoc(); 5742 5743 if (trySkipId(Name)) { 5744 Bit = 1; 5745 } else if (trySkipId("no", Name)) { 5746 Bit = 0; 5747 } else { 5748 return MatchOperand_NoMatch; 5749 } 5750 5751 if (Name == "r128" && !hasMIMG_R128()) { 5752 Error(S, "r128 modifier is not supported on this GPU"); 5753 return MatchOperand_ParseFail; 5754 } 5755 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5756 Error(S, "a16 modifier is not supported on this GPU"); 5757 return MatchOperand_ParseFail; 5758 } 5759 5760 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5761 ImmTy = AMDGPUOperand::ImmTyR128A16; 5762 5763 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5764 return MatchOperand_Success; 5765 } 5766 5767 OperandMatchResultTy 5768 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5769 unsigned CPolOn = 0; 5770 unsigned CPolOff = 0; 5771 SMLoc S = getLoc(); 5772 5773 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5774 if (isGFX940() && !Mnemo.startswith("s_")) { 5775 if (trySkipId("sc0")) 5776 CPolOn = AMDGPU::CPol::SC0; 5777 else if (trySkipId("nosc0")) 5778 CPolOff = AMDGPU::CPol::SC0; 5779 else if (trySkipId("nt")) 5780 CPolOn = AMDGPU::CPol::NT; 5781 else if (trySkipId("nont")) 5782 CPolOff = AMDGPU::CPol::NT; 5783 else if (trySkipId("sc1")) 5784 CPolOn = AMDGPU::CPol::SC1; 5785 else if (trySkipId("nosc1")) 5786 CPolOff = AMDGPU::CPol::SC1; 5787 else 5788 return MatchOperand_NoMatch; 5789 } 5790 else if (trySkipId("glc")) 5791 CPolOn = AMDGPU::CPol::GLC; 5792 else if (trySkipId("noglc")) 5793 CPolOff = AMDGPU::CPol::GLC; 5794 else if (trySkipId("slc")) 5795 CPolOn = AMDGPU::CPol::SLC; 5796 else if (trySkipId("noslc")) 5797 CPolOff = AMDGPU::CPol::SLC; 5798 else if (trySkipId("dlc")) 5799 CPolOn = AMDGPU::CPol::DLC; 5800 else if (trySkipId("nodlc")) 5801 CPolOff = AMDGPU::CPol::DLC; 5802 else if (trySkipId("scc")) 5803 CPolOn = AMDGPU::CPol::SCC; 5804 else if (trySkipId("noscc")) 5805 CPolOff = AMDGPU::CPol::SCC; 5806 else 5807 return MatchOperand_NoMatch; 5808 5809 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5810 Error(S, "dlc modifier is not supported on this GPU"); 5811 return MatchOperand_ParseFail; 5812 } 5813 5814 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5815 Error(S, "scc modifier is not supported on this GPU"); 5816 return MatchOperand_ParseFail; 5817 } 5818 5819 if (CPolSeen & (CPolOn | CPolOff)) { 5820 Error(S, "duplicate cache policy modifier"); 5821 return MatchOperand_ParseFail; 5822 } 5823 5824 CPolSeen |= (CPolOn | CPolOff); 5825 5826 for (unsigned I = 1; I != Operands.size(); ++I) { 5827 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5828 if (Op.isCPol()) { 5829 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5830 return MatchOperand_Success; 5831 } 5832 } 5833 5834 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5835 AMDGPUOperand::ImmTyCPol)); 5836 5837 return MatchOperand_Success; 5838 } 5839 5840 static void addOptionalImmOperand( 5841 MCInst& Inst, const OperandVector& Operands, 5842 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5843 AMDGPUOperand::ImmTy ImmT, 5844 int64_t Default = 0) { 5845 auto i = OptionalIdx.find(ImmT); 5846 if (i != OptionalIdx.end()) { 5847 unsigned Idx = i->second; 5848 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5849 } else { 5850 Inst.addOperand(MCOperand::createImm(Default)); 5851 } 5852 } 5853 5854 OperandMatchResultTy 5855 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5856 StringRef &Value, 5857 SMLoc &StringLoc) { 5858 if (!trySkipId(Prefix, AsmToken::Colon)) 5859 return MatchOperand_NoMatch; 5860 5861 StringLoc = getLoc(); 5862 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5863 : MatchOperand_ParseFail; 5864 } 5865 5866 //===----------------------------------------------------------------------===// 5867 // MTBUF format 5868 //===----------------------------------------------------------------------===// 5869 5870 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5871 int64_t MaxVal, 5872 int64_t &Fmt) { 5873 int64_t Val; 5874 SMLoc Loc = getLoc(); 5875 5876 auto Res = parseIntWithPrefix(Pref, Val); 5877 if (Res == MatchOperand_ParseFail) 5878 return false; 5879 if (Res == MatchOperand_NoMatch) 5880 return true; 5881 5882 if (Val < 0 || Val > MaxVal) { 5883 Error(Loc, Twine("out of range ", StringRef(Pref))); 5884 return false; 5885 } 5886 5887 Fmt = Val; 5888 return true; 5889 } 5890 5891 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5892 // values to live in a joint format operand in the MCInst encoding. 5893 OperandMatchResultTy 5894 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5895 using namespace llvm::AMDGPU::MTBUFFormat; 5896 5897 int64_t Dfmt = DFMT_UNDEF; 5898 int64_t Nfmt = NFMT_UNDEF; 5899 5900 // dfmt and nfmt can appear in either order, and each is optional. 5901 for (int I = 0; I < 2; ++I) { 5902 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5903 return MatchOperand_ParseFail; 5904 5905 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5906 return MatchOperand_ParseFail; 5907 } 5908 // Skip optional comma between dfmt/nfmt 5909 // but guard against 2 commas following each other. 5910 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5911 !peekToken().is(AsmToken::Comma)) { 5912 trySkipToken(AsmToken::Comma); 5913 } 5914 } 5915 5916 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5917 return MatchOperand_NoMatch; 5918 5919 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5920 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5921 5922 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5923 return MatchOperand_Success; 5924 } 5925 5926 OperandMatchResultTy 5927 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5928 using namespace llvm::AMDGPU::MTBUFFormat; 5929 5930 int64_t Fmt = UFMT_UNDEF; 5931 5932 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5933 return MatchOperand_ParseFail; 5934 5935 if (Fmt == UFMT_UNDEF) 5936 return MatchOperand_NoMatch; 5937 5938 Format = Fmt; 5939 return MatchOperand_Success; 5940 } 5941 5942 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5943 int64_t &Nfmt, 5944 StringRef FormatStr, 5945 SMLoc Loc) { 5946 using namespace llvm::AMDGPU::MTBUFFormat; 5947 int64_t Format; 5948 5949 Format = getDfmt(FormatStr); 5950 if (Format != DFMT_UNDEF) { 5951 Dfmt = Format; 5952 return true; 5953 } 5954 5955 Format = getNfmt(FormatStr, getSTI()); 5956 if (Format != NFMT_UNDEF) { 5957 Nfmt = Format; 5958 return true; 5959 } 5960 5961 Error(Loc, "unsupported format"); 5962 return false; 5963 } 5964 5965 OperandMatchResultTy 5966 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5967 SMLoc FormatLoc, 5968 int64_t &Format) { 5969 using namespace llvm::AMDGPU::MTBUFFormat; 5970 5971 int64_t Dfmt = DFMT_UNDEF; 5972 int64_t Nfmt = NFMT_UNDEF; 5973 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5974 return MatchOperand_ParseFail; 5975 5976 if (trySkipToken(AsmToken::Comma)) { 5977 StringRef Str; 5978 SMLoc Loc = getLoc(); 5979 if (!parseId(Str, "expected a format string") || 5980 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5981 return MatchOperand_ParseFail; 5982 } 5983 if (Dfmt == DFMT_UNDEF) { 5984 Error(Loc, "duplicate numeric format"); 5985 return MatchOperand_ParseFail; 5986 } else if (Nfmt == NFMT_UNDEF) { 5987 Error(Loc, "duplicate data format"); 5988 return MatchOperand_ParseFail; 5989 } 5990 } 5991 5992 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5993 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5994 5995 if (isGFX10Plus()) { 5996 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5997 if (Ufmt == UFMT_UNDEF) { 5998 Error(FormatLoc, "unsupported format"); 5999 return MatchOperand_ParseFail; 6000 } 6001 Format = Ufmt; 6002 } else { 6003 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6004 } 6005 6006 return MatchOperand_Success; 6007 } 6008 6009 OperandMatchResultTy 6010 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6011 SMLoc Loc, 6012 int64_t &Format) { 6013 using namespace llvm::AMDGPU::MTBUFFormat; 6014 6015 auto Id = getUnifiedFormat(FormatStr); 6016 if (Id == UFMT_UNDEF) 6017 return MatchOperand_NoMatch; 6018 6019 if (!isGFX10Plus()) { 6020 Error(Loc, "unified format is not supported on this GPU"); 6021 return MatchOperand_ParseFail; 6022 } 6023 6024 Format = Id; 6025 return MatchOperand_Success; 6026 } 6027 6028 OperandMatchResultTy 6029 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6030 using namespace llvm::AMDGPU::MTBUFFormat; 6031 SMLoc Loc = getLoc(); 6032 6033 if (!parseExpr(Format)) 6034 return MatchOperand_ParseFail; 6035 if (!isValidFormatEncoding(Format, getSTI())) { 6036 Error(Loc, "out of range format"); 6037 return MatchOperand_ParseFail; 6038 } 6039 6040 return MatchOperand_Success; 6041 } 6042 6043 OperandMatchResultTy 6044 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6045 using namespace llvm::AMDGPU::MTBUFFormat; 6046 6047 if (!trySkipId("format", AsmToken::Colon)) 6048 return MatchOperand_NoMatch; 6049 6050 if (trySkipToken(AsmToken::LBrac)) { 6051 StringRef FormatStr; 6052 SMLoc Loc = getLoc(); 6053 if (!parseId(FormatStr, "expected a format string")) 6054 return MatchOperand_ParseFail; 6055 6056 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6057 if (Res == MatchOperand_NoMatch) 6058 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6059 if (Res != MatchOperand_Success) 6060 return Res; 6061 6062 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6063 return MatchOperand_ParseFail; 6064 6065 return MatchOperand_Success; 6066 } 6067 6068 return parseNumericFormat(Format); 6069 } 6070 6071 OperandMatchResultTy 6072 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6073 using namespace llvm::AMDGPU::MTBUFFormat; 6074 6075 int64_t Format = getDefaultFormatEncoding(getSTI()); 6076 OperandMatchResultTy Res; 6077 SMLoc Loc = getLoc(); 6078 6079 // Parse legacy format syntax. 6080 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6081 if (Res == MatchOperand_ParseFail) 6082 return Res; 6083 6084 bool FormatFound = (Res == MatchOperand_Success); 6085 6086 Operands.push_back( 6087 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6088 6089 if (FormatFound) 6090 trySkipToken(AsmToken::Comma); 6091 6092 if (isToken(AsmToken::EndOfStatement)) { 6093 // We are expecting an soffset operand, 6094 // but let matcher handle the error. 6095 return MatchOperand_Success; 6096 } 6097 6098 // Parse soffset. 6099 Res = parseRegOrImm(Operands); 6100 if (Res != MatchOperand_Success) 6101 return Res; 6102 6103 trySkipToken(AsmToken::Comma); 6104 6105 if (!FormatFound) { 6106 Res = parseSymbolicOrNumericFormat(Format); 6107 if (Res == MatchOperand_ParseFail) 6108 return Res; 6109 if (Res == MatchOperand_Success) { 6110 auto Size = Operands.size(); 6111 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6112 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6113 Op.setImm(Format); 6114 } 6115 return MatchOperand_Success; 6116 } 6117 6118 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6119 Error(getLoc(), "duplicate format"); 6120 return MatchOperand_ParseFail; 6121 } 6122 return MatchOperand_Success; 6123 } 6124 6125 //===----------------------------------------------------------------------===// 6126 // ds 6127 //===----------------------------------------------------------------------===// 6128 6129 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6130 const OperandVector &Operands) { 6131 OptionalImmIndexMap OptionalIdx; 6132 6133 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6134 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6135 6136 // Add the register arguments 6137 if (Op.isReg()) { 6138 Op.addRegOperands(Inst, 1); 6139 continue; 6140 } 6141 6142 // Handle optional arguments 6143 OptionalIdx[Op.getImmTy()] = i; 6144 } 6145 6146 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6147 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6148 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6149 6150 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6151 } 6152 6153 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6154 bool IsGdsHardcoded) { 6155 OptionalImmIndexMap OptionalIdx; 6156 6157 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6158 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6159 6160 // Add the register arguments 6161 if (Op.isReg()) { 6162 Op.addRegOperands(Inst, 1); 6163 continue; 6164 } 6165 6166 if (Op.isToken() && Op.getToken() == "gds") { 6167 IsGdsHardcoded = true; 6168 continue; 6169 } 6170 6171 // Handle optional arguments 6172 OptionalIdx[Op.getImmTy()] = i; 6173 } 6174 6175 AMDGPUOperand::ImmTy OffsetType = 6176 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6177 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6178 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6179 AMDGPUOperand::ImmTyOffset; 6180 6181 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6182 6183 if (!IsGdsHardcoded) { 6184 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6185 } 6186 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6187 } 6188 6189 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6190 OptionalImmIndexMap OptionalIdx; 6191 6192 unsigned OperandIdx[4]; 6193 unsigned EnMask = 0; 6194 int SrcIdx = 0; 6195 6196 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6197 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6198 6199 // Add the register arguments 6200 if (Op.isReg()) { 6201 assert(SrcIdx < 4); 6202 OperandIdx[SrcIdx] = Inst.size(); 6203 Op.addRegOperands(Inst, 1); 6204 ++SrcIdx; 6205 continue; 6206 } 6207 6208 if (Op.isOff()) { 6209 assert(SrcIdx < 4); 6210 OperandIdx[SrcIdx] = Inst.size(); 6211 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6212 ++SrcIdx; 6213 continue; 6214 } 6215 6216 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6217 Op.addImmOperands(Inst, 1); 6218 continue; 6219 } 6220 6221 if (Op.isToken() && Op.getToken() == "done") 6222 continue; 6223 6224 // Handle optional arguments 6225 OptionalIdx[Op.getImmTy()] = i; 6226 } 6227 6228 assert(SrcIdx == 4); 6229 6230 bool Compr = false; 6231 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6232 Compr = true; 6233 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6234 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6235 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6236 } 6237 6238 for (auto i = 0; i < SrcIdx; ++i) { 6239 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6240 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6241 } 6242 } 6243 6244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6246 6247 Inst.addOperand(MCOperand::createImm(EnMask)); 6248 } 6249 6250 //===----------------------------------------------------------------------===// 6251 // s_waitcnt 6252 //===----------------------------------------------------------------------===// 6253 6254 static bool 6255 encodeCnt( 6256 const AMDGPU::IsaVersion ISA, 6257 int64_t &IntVal, 6258 int64_t CntVal, 6259 bool Saturate, 6260 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6261 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6262 { 6263 bool Failed = false; 6264 6265 IntVal = encode(ISA, IntVal, CntVal); 6266 if (CntVal != decode(ISA, IntVal)) { 6267 if (Saturate) { 6268 IntVal = encode(ISA, IntVal, -1); 6269 } else { 6270 Failed = true; 6271 } 6272 } 6273 return Failed; 6274 } 6275 6276 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6277 6278 SMLoc CntLoc = getLoc(); 6279 StringRef CntName = getTokenStr(); 6280 6281 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6282 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6283 return false; 6284 6285 int64_t CntVal; 6286 SMLoc ValLoc = getLoc(); 6287 if (!parseExpr(CntVal)) 6288 return false; 6289 6290 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6291 6292 bool Failed = true; 6293 bool Sat = CntName.endswith("_sat"); 6294 6295 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6296 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6297 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6298 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6299 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6300 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6301 } else { 6302 Error(CntLoc, "invalid counter name " + CntName); 6303 return false; 6304 } 6305 6306 if (Failed) { 6307 Error(ValLoc, "too large value for " + CntName); 6308 return false; 6309 } 6310 6311 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6312 return false; 6313 6314 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6315 if (isToken(AsmToken::EndOfStatement)) { 6316 Error(getLoc(), "expected a counter name"); 6317 return false; 6318 } 6319 } 6320 6321 return true; 6322 } 6323 6324 OperandMatchResultTy 6325 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6326 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6327 int64_t Waitcnt = getWaitcntBitMask(ISA); 6328 SMLoc S = getLoc(); 6329 6330 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6331 while (!isToken(AsmToken::EndOfStatement)) { 6332 if (!parseCnt(Waitcnt)) 6333 return MatchOperand_ParseFail; 6334 } 6335 } else { 6336 if (!parseExpr(Waitcnt)) 6337 return MatchOperand_ParseFail; 6338 } 6339 6340 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6341 return MatchOperand_Success; 6342 } 6343 6344 bool 6345 AMDGPUOperand::isSWaitCnt() const { 6346 return isImm(); 6347 } 6348 6349 //===----------------------------------------------------------------------===// 6350 // DepCtr 6351 //===----------------------------------------------------------------------===// 6352 6353 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6354 StringRef DepCtrName) { 6355 switch (ErrorId) { 6356 case OPR_ID_UNKNOWN: 6357 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6358 return; 6359 case OPR_ID_UNSUPPORTED: 6360 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6361 return; 6362 case OPR_ID_DUPLICATE: 6363 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6364 return; 6365 case OPR_VAL_INVALID: 6366 Error(Loc, Twine("invalid value for ", DepCtrName)); 6367 return; 6368 default: 6369 assert(false); 6370 } 6371 } 6372 6373 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6374 6375 using namespace llvm::AMDGPU::DepCtr; 6376 6377 SMLoc DepCtrLoc = getLoc(); 6378 StringRef DepCtrName = getTokenStr(); 6379 6380 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6381 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6382 return false; 6383 6384 int64_t ExprVal; 6385 if (!parseExpr(ExprVal)) 6386 return false; 6387 6388 unsigned PrevOprMask = UsedOprMask; 6389 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6390 6391 if (CntVal < 0) { 6392 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6393 return false; 6394 } 6395 6396 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6397 return false; 6398 6399 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6400 if (isToken(AsmToken::EndOfStatement)) { 6401 Error(getLoc(), "expected a counter name"); 6402 return false; 6403 } 6404 } 6405 6406 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6407 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6408 return true; 6409 } 6410 6411 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6412 using namespace llvm::AMDGPU::DepCtr; 6413 6414 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6415 SMLoc Loc = getLoc(); 6416 6417 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6418 unsigned UsedOprMask = 0; 6419 while (!isToken(AsmToken::EndOfStatement)) { 6420 if (!parseDepCtr(DepCtr, UsedOprMask)) 6421 return MatchOperand_ParseFail; 6422 } 6423 } else { 6424 if (!parseExpr(DepCtr)) 6425 return MatchOperand_ParseFail; 6426 } 6427 6428 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6429 return MatchOperand_Success; 6430 } 6431 6432 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6433 6434 //===----------------------------------------------------------------------===// 6435 // hwreg 6436 //===----------------------------------------------------------------------===// 6437 6438 bool 6439 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6440 OperandInfoTy &Offset, 6441 OperandInfoTy &Width) { 6442 using namespace llvm::AMDGPU::Hwreg; 6443 6444 // The register may be specified by name or using a numeric code 6445 HwReg.Loc = getLoc(); 6446 if (isToken(AsmToken::Identifier) && 6447 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6448 HwReg.IsSymbolic = true; 6449 lex(); // skip register name 6450 } else if (!parseExpr(HwReg.Id, "a register name")) { 6451 return false; 6452 } 6453 6454 if (trySkipToken(AsmToken::RParen)) 6455 return true; 6456 6457 // parse optional params 6458 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6459 return false; 6460 6461 Offset.Loc = getLoc(); 6462 if (!parseExpr(Offset.Id)) 6463 return false; 6464 6465 if (!skipToken(AsmToken::Comma, "expected a comma")) 6466 return false; 6467 6468 Width.Loc = getLoc(); 6469 return parseExpr(Width.Id) && 6470 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6471 } 6472 6473 bool 6474 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6475 const OperandInfoTy &Offset, 6476 const OperandInfoTy &Width) { 6477 6478 using namespace llvm::AMDGPU::Hwreg; 6479 6480 if (HwReg.IsSymbolic) { 6481 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6482 Error(HwReg.Loc, 6483 "specified hardware register is not supported on this GPU"); 6484 return false; 6485 } 6486 } else { 6487 if (!isValidHwreg(HwReg.Id)) { 6488 Error(HwReg.Loc, 6489 "invalid code of hardware register: only 6-bit values are legal"); 6490 return false; 6491 } 6492 } 6493 if (!isValidHwregOffset(Offset.Id)) { 6494 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6495 return false; 6496 } 6497 if (!isValidHwregWidth(Width.Id)) { 6498 Error(Width.Loc, 6499 "invalid bitfield width: only values from 1 to 32 are legal"); 6500 return false; 6501 } 6502 return true; 6503 } 6504 6505 OperandMatchResultTy 6506 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6507 using namespace llvm::AMDGPU::Hwreg; 6508 6509 int64_t ImmVal = 0; 6510 SMLoc Loc = getLoc(); 6511 6512 if (trySkipId("hwreg", AsmToken::LParen)) { 6513 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6514 OperandInfoTy Offset(OFFSET_DEFAULT_); 6515 OperandInfoTy Width(WIDTH_DEFAULT_); 6516 if (parseHwregBody(HwReg, Offset, Width) && 6517 validateHwreg(HwReg, Offset, Width)) { 6518 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6519 } else { 6520 return MatchOperand_ParseFail; 6521 } 6522 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6523 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6524 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6525 return MatchOperand_ParseFail; 6526 } 6527 } else { 6528 return MatchOperand_ParseFail; 6529 } 6530 6531 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6532 return MatchOperand_Success; 6533 } 6534 6535 bool AMDGPUOperand::isHwreg() const { 6536 return isImmTy(ImmTyHwreg); 6537 } 6538 6539 //===----------------------------------------------------------------------===// 6540 // sendmsg 6541 //===----------------------------------------------------------------------===// 6542 6543 bool 6544 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6545 OperandInfoTy &Op, 6546 OperandInfoTy &Stream) { 6547 using namespace llvm::AMDGPU::SendMsg; 6548 6549 Msg.Loc = getLoc(); 6550 if (isToken(AsmToken::Identifier) && 6551 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6552 Msg.IsSymbolic = true; 6553 lex(); // skip message name 6554 } else if (!parseExpr(Msg.Id, "a message name")) { 6555 return false; 6556 } 6557 6558 if (trySkipToken(AsmToken::Comma)) { 6559 Op.IsDefined = true; 6560 Op.Loc = getLoc(); 6561 if (isToken(AsmToken::Identifier) && 6562 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6563 lex(); // skip operation name 6564 } else if (!parseExpr(Op.Id, "an operation name")) { 6565 return false; 6566 } 6567 6568 if (trySkipToken(AsmToken::Comma)) { 6569 Stream.IsDefined = true; 6570 Stream.Loc = getLoc(); 6571 if (!parseExpr(Stream.Id)) 6572 return false; 6573 } 6574 } 6575 6576 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6577 } 6578 6579 bool 6580 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6581 const OperandInfoTy &Op, 6582 const OperandInfoTy &Stream) { 6583 using namespace llvm::AMDGPU::SendMsg; 6584 6585 // Validation strictness depends on whether message is specified 6586 // in a symbolic or in a numeric form. In the latter case 6587 // only encoding possibility is checked. 6588 bool Strict = Msg.IsSymbolic; 6589 6590 if (Strict) { 6591 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6592 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6593 return false; 6594 } 6595 } else { 6596 if (!isValidMsgId(Msg.Id)) { 6597 Error(Msg.Loc, "invalid message id"); 6598 return false; 6599 } 6600 } 6601 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6602 if (Op.IsDefined) { 6603 Error(Op.Loc, "message does not support operations"); 6604 } else { 6605 Error(Msg.Loc, "missing message operation"); 6606 } 6607 return false; 6608 } 6609 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6610 Error(Op.Loc, "invalid operation id"); 6611 return false; 6612 } 6613 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6614 Error(Stream.Loc, "message operation does not support streams"); 6615 return false; 6616 } 6617 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6618 Error(Stream.Loc, "invalid message stream id"); 6619 return false; 6620 } 6621 return true; 6622 } 6623 6624 OperandMatchResultTy 6625 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6626 using namespace llvm::AMDGPU::SendMsg; 6627 6628 int64_t ImmVal = 0; 6629 SMLoc Loc = getLoc(); 6630 6631 if (trySkipId("sendmsg", AsmToken::LParen)) { 6632 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6633 OperandInfoTy Op(OP_NONE_); 6634 OperandInfoTy Stream(STREAM_ID_NONE_); 6635 if (parseSendMsgBody(Msg, Op, Stream) && 6636 validateSendMsg(Msg, Op, Stream)) { 6637 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6638 } else { 6639 return MatchOperand_ParseFail; 6640 } 6641 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6642 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6643 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6644 return MatchOperand_ParseFail; 6645 } 6646 } else { 6647 return MatchOperand_ParseFail; 6648 } 6649 6650 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6651 return MatchOperand_Success; 6652 } 6653 6654 bool AMDGPUOperand::isSendMsg() const { 6655 return isImmTy(ImmTySendMsg); 6656 } 6657 6658 //===----------------------------------------------------------------------===// 6659 // v_interp 6660 //===----------------------------------------------------------------------===// 6661 6662 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6663 StringRef Str; 6664 SMLoc S = getLoc(); 6665 6666 if (!parseId(Str)) 6667 return MatchOperand_NoMatch; 6668 6669 int Slot = StringSwitch<int>(Str) 6670 .Case("p10", 0) 6671 .Case("p20", 1) 6672 .Case("p0", 2) 6673 .Default(-1); 6674 6675 if (Slot == -1) { 6676 Error(S, "invalid interpolation slot"); 6677 return MatchOperand_ParseFail; 6678 } 6679 6680 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6681 AMDGPUOperand::ImmTyInterpSlot)); 6682 return MatchOperand_Success; 6683 } 6684 6685 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6686 StringRef Str; 6687 SMLoc S = getLoc(); 6688 6689 if (!parseId(Str)) 6690 return MatchOperand_NoMatch; 6691 6692 if (!Str.startswith("attr")) { 6693 Error(S, "invalid interpolation attribute"); 6694 return MatchOperand_ParseFail; 6695 } 6696 6697 StringRef Chan = Str.take_back(2); 6698 int AttrChan = StringSwitch<int>(Chan) 6699 .Case(".x", 0) 6700 .Case(".y", 1) 6701 .Case(".z", 2) 6702 .Case(".w", 3) 6703 .Default(-1); 6704 if (AttrChan == -1) { 6705 Error(S, "invalid or missing interpolation attribute channel"); 6706 return MatchOperand_ParseFail; 6707 } 6708 6709 Str = Str.drop_back(2).drop_front(4); 6710 6711 uint8_t Attr; 6712 if (Str.getAsInteger(10, Attr)) { 6713 Error(S, "invalid or missing interpolation attribute number"); 6714 return MatchOperand_ParseFail; 6715 } 6716 6717 if (Attr > 63) { 6718 Error(S, "out of bounds interpolation attribute number"); 6719 return MatchOperand_ParseFail; 6720 } 6721 6722 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6723 6724 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6725 AMDGPUOperand::ImmTyInterpAttr)); 6726 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6727 AMDGPUOperand::ImmTyAttrChan)); 6728 return MatchOperand_Success; 6729 } 6730 6731 //===----------------------------------------------------------------------===// 6732 // exp 6733 //===----------------------------------------------------------------------===// 6734 6735 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6736 using namespace llvm::AMDGPU::Exp; 6737 6738 StringRef Str; 6739 SMLoc S = getLoc(); 6740 6741 if (!parseId(Str)) 6742 return MatchOperand_NoMatch; 6743 6744 unsigned Id = getTgtId(Str); 6745 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6746 Error(S, (Id == ET_INVALID) ? 6747 "invalid exp target" : 6748 "exp target is not supported on this GPU"); 6749 return MatchOperand_ParseFail; 6750 } 6751 6752 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6753 AMDGPUOperand::ImmTyExpTgt)); 6754 return MatchOperand_Success; 6755 } 6756 6757 //===----------------------------------------------------------------------===// 6758 // parser helpers 6759 //===----------------------------------------------------------------------===// 6760 6761 bool 6762 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6763 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6764 } 6765 6766 bool 6767 AMDGPUAsmParser::isId(const StringRef Id) const { 6768 return isId(getToken(), Id); 6769 } 6770 6771 bool 6772 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6773 return getTokenKind() == Kind; 6774 } 6775 6776 bool 6777 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6778 if (isId(Id)) { 6779 lex(); 6780 return true; 6781 } 6782 return false; 6783 } 6784 6785 bool 6786 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6787 if (isToken(AsmToken::Identifier)) { 6788 StringRef Tok = getTokenStr(); 6789 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6790 lex(); 6791 return true; 6792 } 6793 } 6794 return false; 6795 } 6796 6797 bool 6798 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6799 if (isId(Id) && peekToken().is(Kind)) { 6800 lex(); 6801 lex(); 6802 return true; 6803 } 6804 return false; 6805 } 6806 6807 bool 6808 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6809 if (isToken(Kind)) { 6810 lex(); 6811 return true; 6812 } 6813 return false; 6814 } 6815 6816 bool 6817 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6818 const StringRef ErrMsg) { 6819 if (!trySkipToken(Kind)) { 6820 Error(getLoc(), ErrMsg); 6821 return false; 6822 } 6823 return true; 6824 } 6825 6826 bool 6827 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6828 SMLoc S = getLoc(); 6829 6830 const MCExpr *Expr; 6831 if (Parser.parseExpression(Expr)) 6832 return false; 6833 6834 if (Expr->evaluateAsAbsolute(Imm)) 6835 return true; 6836 6837 if (Expected.empty()) { 6838 Error(S, "expected absolute expression"); 6839 } else { 6840 Error(S, Twine("expected ", Expected) + 6841 Twine(" or an absolute expression")); 6842 } 6843 return false; 6844 } 6845 6846 bool 6847 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6848 SMLoc S = getLoc(); 6849 6850 const MCExpr *Expr; 6851 if (Parser.parseExpression(Expr)) 6852 return false; 6853 6854 int64_t IntVal; 6855 if (Expr->evaluateAsAbsolute(IntVal)) { 6856 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6857 } else { 6858 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6859 } 6860 return true; 6861 } 6862 6863 bool 6864 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6865 if (isToken(AsmToken::String)) { 6866 Val = getToken().getStringContents(); 6867 lex(); 6868 return true; 6869 } else { 6870 Error(getLoc(), ErrMsg); 6871 return false; 6872 } 6873 } 6874 6875 bool 6876 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6877 if (isToken(AsmToken::Identifier)) { 6878 Val = getTokenStr(); 6879 lex(); 6880 return true; 6881 } else { 6882 if (!ErrMsg.empty()) 6883 Error(getLoc(), ErrMsg); 6884 return false; 6885 } 6886 } 6887 6888 AsmToken 6889 AMDGPUAsmParser::getToken() const { 6890 return Parser.getTok(); 6891 } 6892 6893 AsmToken 6894 AMDGPUAsmParser::peekToken() { 6895 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6896 } 6897 6898 void 6899 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6900 auto TokCount = getLexer().peekTokens(Tokens); 6901 6902 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6903 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6904 } 6905 6906 AsmToken::TokenKind 6907 AMDGPUAsmParser::getTokenKind() const { 6908 return getLexer().getKind(); 6909 } 6910 6911 SMLoc 6912 AMDGPUAsmParser::getLoc() const { 6913 return getToken().getLoc(); 6914 } 6915 6916 StringRef 6917 AMDGPUAsmParser::getTokenStr() const { 6918 return getToken().getString(); 6919 } 6920 6921 void 6922 AMDGPUAsmParser::lex() { 6923 Parser.Lex(); 6924 } 6925 6926 SMLoc 6927 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6928 const OperandVector &Operands) const { 6929 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6930 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6931 if (Test(Op)) 6932 return Op.getStartLoc(); 6933 } 6934 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6935 } 6936 6937 SMLoc 6938 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6939 const OperandVector &Operands) const { 6940 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6941 return getOperandLoc(Test, Operands); 6942 } 6943 6944 SMLoc 6945 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6946 const OperandVector &Operands) const { 6947 auto Test = [=](const AMDGPUOperand& Op) { 6948 return Op.isRegKind() && Op.getReg() == Reg; 6949 }; 6950 return getOperandLoc(Test, Operands); 6951 } 6952 6953 SMLoc 6954 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6955 auto Test = [](const AMDGPUOperand& Op) { 6956 return Op.IsImmKindLiteral() || Op.isExpr(); 6957 }; 6958 return getOperandLoc(Test, Operands); 6959 } 6960 6961 SMLoc 6962 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6963 auto Test = [](const AMDGPUOperand& Op) { 6964 return Op.isImmKindConst(); 6965 }; 6966 return getOperandLoc(Test, Operands); 6967 } 6968 6969 //===----------------------------------------------------------------------===// 6970 // swizzle 6971 //===----------------------------------------------------------------------===// 6972 6973 LLVM_READNONE 6974 static unsigned 6975 encodeBitmaskPerm(const unsigned AndMask, 6976 const unsigned OrMask, 6977 const unsigned XorMask) { 6978 using namespace llvm::AMDGPU::Swizzle; 6979 6980 return BITMASK_PERM_ENC | 6981 (AndMask << BITMASK_AND_SHIFT) | 6982 (OrMask << BITMASK_OR_SHIFT) | 6983 (XorMask << BITMASK_XOR_SHIFT); 6984 } 6985 6986 bool 6987 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6988 const unsigned MinVal, 6989 const unsigned MaxVal, 6990 const StringRef ErrMsg, 6991 SMLoc &Loc) { 6992 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6993 return false; 6994 } 6995 Loc = getLoc(); 6996 if (!parseExpr(Op)) { 6997 return false; 6998 } 6999 if (Op < MinVal || Op > MaxVal) { 7000 Error(Loc, ErrMsg); 7001 return false; 7002 } 7003 7004 return true; 7005 } 7006 7007 bool 7008 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7009 const unsigned MinVal, 7010 const unsigned MaxVal, 7011 const StringRef ErrMsg) { 7012 SMLoc Loc; 7013 for (unsigned i = 0; i < OpNum; ++i) { 7014 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7015 return false; 7016 } 7017 7018 return true; 7019 } 7020 7021 bool 7022 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7023 using namespace llvm::AMDGPU::Swizzle; 7024 7025 int64_t Lane[LANE_NUM]; 7026 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7027 "expected a 2-bit lane id")) { 7028 Imm = QUAD_PERM_ENC; 7029 for (unsigned I = 0; I < LANE_NUM; ++I) { 7030 Imm |= Lane[I] << (LANE_SHIFT * I); 7031 } 7032 return true; 7033 } 7034 return false; 7035 } 7036 7037 bool 7038 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7039 using namespace llvm::AMDGPU::Swizzle; 7040 7041 SMLoc Loc; 7042 int64_t GroupSize; 7043 int64_t LaneIdx; 7044 7045 if (!parseSwizzleOperand(GroupSize, 7046 2, 32, 7047 "group size must be in the interval [2,32]", 7048 Loc)) { 7049 return false; 7050 } 7051 if (!isPowerOf2_64(GroupSize)) { 7052 Error(Loc, "group size must be a power of two"); 7053 return false; 7054 } 7055 if (parseSwizzleOperand(LaneIdx, 7056 0, GroupSize - 1, 7057 "lane id must be in the interval [0,group size - 1]", 7058 Loc)) { 7059 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7060 return true; 7061 } 7062 return false; 7063 } 7064 7065 bool 7066 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7067 using namespace llvm::AMDGPU::Swizzle; 7068 7069 SMLoc Loc; 7070 int64_t GroupSize; 7071 7072 if (!parseSwizzleOperand(GroupSize, 7073 2, 32, 7074 "group size must be in the interval [2,32]", 7075 Loc)) { 7076 return false; 7077 } 7078 if (!isPowerOf2_64(GroupSize)) { 7079 Error(Loc, "group size must be a power of two"); 7080 return false; 7081 } 7082 7083 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7084 return true; 7085 } 7086 7087 bool 7088 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7089 using namespace llvm::AMDGPU::Swizzle; 7090 7091 SMLoc Loc; 7092 int64_t GroupSize; 7093 7094 if (!parseSwizzleOperand(GroupSize, 7095 1, 16, 7096 "group size must be in the interval [1,16]", 7097 Loc)) { 7098 return false; 7099 } 7100 if (!isPowerOf2_64(GroupSize)) { 7101 Error(Loc, "group size must be a power of two"); 7102 return false; 7103 } 7104 7105 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7106 return true; 7107 } 7108 7109 bool 7110 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7111 using namespace llvm::AMDGPU::Swizzle; 7112 7113 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7114 return false; 7115 } 7116 7117 StringRef Ctl; 7118 SMLoc StrLoc = getLoc(); 7119 if (!parseString(Ctl)) { 7120 return false; 7121 } 7122 if (Ctl.size() != BITMASK_WIDTH) { 7123 Error(StrLoc, "expected a 5-character mask"); 7124 return false; 7125 } 7126 7127 unsigned AndMask = 0; 7128 unsigned OrMask = 0; 7129 unsigned XorMask = 0; 7130 7131 for (size_t i = 0; i < Ctl.size(); ++i) { 7132 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7133 switch(Ctl[i]) { 7134 default: 7135 Error(StrLoc, "invalid mask"); 7136 return false; 7137 case '0': 7138 break; 7139 case '1': 7140 OrMask |= Mask; 7141 break; 7142 case 'p': 7143 AndMask |= Mask; 7144 break; 7145 case 'i': 7146 AndMask |= Mask; 7147 XorMask |= Mask; 7148 break; 7149 } 7150 } 7151 7152 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7153 return true; 7154 } 7155 7156 bool 7157 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7158 7159 SMLoc OffsetLoc = getLoc(); 7160 7161 if (!parseExpr(Imm, "a swizzle macro")) { 7162 return false; 7163 } 7164 if (!isUInt<16>(Imm)) { 7165 Error(OffsetLoc, "expected a 16-bit offset"); 7166 return false; 7167 } 7168 return true; 7169 } 7170 7171 bool 7172 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7173 using namespace llvm::AMDGPU::Swizzle; 7174 7175 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7176 7177 SMLoc ModeLoc = getLoc(); 7178 bool Ok = false; 7179 7180 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7181 Ok = parseSwizzleQuadPerm(Imm); 7182 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7183 Ok = parseSwizzleBitmaskPerm(Imm); 7184 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7185 Ok = parseSwizzleBroadcast(Imm); 7186 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7187 Ok = parseSwizzleSwap(Imm); 7188 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7189 Ok = parseSwizzleReverse(Imm); 7190 } else { 7191 Error(ModeLoc, "expected a swizzle mode"); 7192 } 7193 7194 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7195 } 7196 7197 return false; 7198 } 7199 7200 OperandMatchResultTy 7201 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7202 SMLoc S = getLoc(); 7203 int64_t Imm = 0; 7204 7205 if (trySkipId("offset")) { 7206 7207 bool Ok = false; 7208 if (skipToken(AsmToken::Colon, "expected a colon")) { 7209 if (trySkipId("swizzle")) { 7210 Ok = parseSwizzleMacro(Imm); 7211 } else { 7212 Ok = parseSwizzleOffset(Imm); 7213 } 7214 } 7215 7216 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7217 7218 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7219 } else { 7220 // Swizzle "offset" operand is optional. 7221 // If it is omitted, try parsing other optional operands. 7222 return parseOptionalOpr(Operands); 7223 } 7224 } 7225 7226 bool 7227 AMDGPUOperand::isSwizzle() const { 7228 return isImmTy(ImmTySwizzle); 7229 } 7230 7231 //===----------------------------------------------------------------------===// 7232 // VGPR Index Mode 7233 //===----------------------------------------------------------------------===// 7234 7235 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7236 7237 using namespace llvm::AMDGPU::VGPRIndexMode; 7238 7239 if (trySkipToken(AsmToken::RParen)) { 7240 return OFF; 7241 } 7242 7243 int64_t Imm = 0; 7244 7245 while (true) { 7246 unsigned Mode = 0; 7247 SMLoc S = getLoc(); 7248 7249 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7250 if (trySkipId(IdSymbolic[ModeId])) { 7251 Mode = 1 << ModeId; 7252 break; 7253 } 7254 } 7255 7256 if (Mode == 0) { 7257 Error(S, (Imm == 0)? 7258 "expected a VGPR index mode or a closing parenthesis" : 7259 "expected a VGPR index mode"); 7260 return UNDEF; 7261 } 7262 7263 if (Imm & Mode) { 7264 Error(S, "duplicate VGPR index mode"); 7265 return UNDEF; 7266 } 7267 Imm |= Mode; 7268 7269 if (trySkipToken(AsmToken::RParen)) 7270 break; 7271 if (!skipToken(AsmToken::Comma, 7272 "expected a comma or a closing parenthesis")) 7273 return UNDEF; 7274 } 7275 7276 return Imm; 7277 } 7278 7279 OperandMatchResultTy 7280 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7281 7282 using namespace llvm::AMDGPU::VGPRIndexMode; 7283 7284 int64_t Imm = 0; 7285 SMLoc S = getLoc(); 7286 7287 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7288 Imm = parseGPRIdxMacro(); 7289 if (Imm == UNDEF) 7290 return MatchOperand_ParseFail; 7291 } else { 7292 if (getParser().parseAbsoluteExpression(Imm)) 7293 return MatchOperand_ParseFail; 7294 if (Imm < 0 || !isUInt<4>(Imm)) { 7295 Error(S, "invalid immediate: only 4-bit values are legal"); 7296 return MatchOperand_ParseFail; 7297 } 7298 } 7299 7300 Operands.push_back( 7301 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7302 return MatchOperand_Success; 7303 } 7304 7305 bool AMDGPUOperand::isGPRIdxMode() const { 7306 return isImmTy(ImmTyGprIdxMode); 7307 } 7308 7309 //===----------------------------------------------------------------------===// 7310 // sopp branch targets 7311 //===----------------------------------------------------------------------===// 7312 7313 OperandMatchResultTy 7314 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7315 7316 // Make sure we are not parsing something 7317 // that looks like a label or an expression but is not. 7318 // This will improve error messages. 7319 if (isRegister() || isModifier()) 7320 return MatchOperand_NoMatch; 7321 7322 if (!parseExpr(Operands)) 7323 return MatchOperand_ParseFail; 7324 7325 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7326 assert(Opr.isImm() || Opr.isExpr()); 7327 SMLoc Loc = Opr.getStartLoc(); 7328 7329 // Currently we do not support arbitrary expressions as branch targets. 7330 // Only labels and absolute expressions are accepted. 7331 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7332 Error(Loc, "expected an absolute expression or a label"); 7333 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7334 Error(Loc, "expected a 16-bit signed jump offset"); 7335 } 7336 7337 return MatchOperand_Success; 7338 } 7339 7340 //===----------------------------------------------------------------------===// 7341 // Boolean holding registers 7342 //===----------------------------------------------------------------------===// 7343 7344 OperandMatchResultTy 7345 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7346 return parseReg(Operands); 7347 } 7348 7349 //===----------------------------------------------------------------------===// 7350 // mubuf 7351 //===----------------------------------------------------------------------===// 7352 7353 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7354 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7355 } 7356 7357 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7358 const OperandVector &Operands, 7359 bool IsAtomic, 7360 bool IsLds) { 7361 OptionalImmIndexMap OptionalIdx; 7362 unsigned FirstOperandIdx = 1; 7363 bool IsAtomicReturn = false; 7364 7365 if (IsAtomic) { 7366 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7367 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7368 if (!Op.isCPol()) 7369 continue; 7370 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7371 break; 7372 } 7373 7374 if (!IsAtomicReturn) { 7375 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7376 if (NewOpc != -1) 7377 Inst.setOpcode(NewOpc); 7378 } 7379 7380 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7381 SIInstrFlags::IsAtomicRet; 7382 } 7383 7384 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7385 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7386 7387 // Add the register arguments 7388 if (Op.isReg()) { 7389 Op.addRegOperands(Inst, 1); 7390 // Insert a tied src for atomic return dst. 7391 // This cannot be postponed as subsequent calls to 7392 // addImmOperands rely on correct number of MC operands. 7393 if (IsAtomicReturn && i == FirstOperandIdx) 7394 Op.addRegOperands(Inst, 1); 7395 continue; 7396 } 7397 7398 // Handle the case where soffset is an immediate 7399 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7400 Op.addImmOperands(Inst, 1); 7401 continue; 7402 } 7403 7404 // Handle tokens like 'offen' which are sometimes hard-coded into the 7405 // asm string. There are no MCInst operands for these. 7406 if (Op.isToken()) { 7407 continue; 7408 } 7409 assert(Op.isImm()); 7410 7411 // Handle optional arguments 7412 OptionalIdx[Op.getImmTy()] = i; 7413 } 7414 7415 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7417 7418 if (!IsLds) { // tfe is not legal with lds opcodes 7419 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7420 } 7421 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7422 } 7423 7424 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7425 OptionalImmIndexMap OptionalIdx; 7426 7427 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7428 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7429 7430 // Add the register arguments 7431 if (Op.isReg()) { 7432 Op.addRegOperands(Inst, 1); 7433 continue; 7434 } 7435 7436 // Handle the case where soffset is an immediate 7437 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7438 Op.addImmOperands(Inst, 1); 7439 continue; 7440 } 7441 7442 // Handle tokens like 'offen' which are sometimes hard-coded into the 7443 // asm string. There are no MCInst operands for these. 7444 if (Op.isToken()) { 7445 continue; 7446 } 7447 assert(Op.isImm()); 7448 7449 // Handle optional arguments 7450 OptionalIdx[Op.getImmTy()] = i; 7451 } 7452 7453 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7454 AMDGPUOperand::ImmTyOffset); 7455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7456 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7459 } 7460 7461 //===----------------------------------------------------------------------===// 7462 // mimg 7463 //===----------------------------------------------------------------------===// 7464 7465 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7466 bool IsAtomic) { 7467 unsigned I = 1; 7468 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7469 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7470 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7471 } 7472 7473 if (IsAtomic) { 7474 // Add src, same as dst 7475 assert(Desc.getNumDefs() == 1); 7476 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7477 } 7478 7479 OptionalImmIndexMap OptionalIdx; 7480 7481 for (unsigned E = Operands.size(); I != E; ++I) { 7482 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7483 7484 // Add the register arguments 7485 if (Op.isReg()) { 7486 Op.addRegOperands(Inst, 1); 7487 } else if (Op.isImmModifier()) { 7488 OptionalIdx[Op.getImmTy()] = I; 7489 } else if (!Op.isToken()) { 7490 llvm_unreachable("unexpected operand type"); 7491 } 7492 } 7493 7494 bool IsGFX10Plus = isGFX10Plus(); 7495 7496 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7497 if (IsGFX10Plus) 7498 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7499 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7500 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7501 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7502 if (IsGFX10Plus) 7503 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7504 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7507 if (!IsGFX10Plus) 7508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7509 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7510 } 7511 7512 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7513 cvtMIMG(Inst, Operands, true); 7514 } 7515 7516 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7517 OptionalImmIndexMap OptionalIdx; 7518 bool IsAtomicReturn = false; 7519 7520 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7521 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7522 if (!Op.isCPol()) 7523 continue; 7524 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7525 break; 7526 } 7527 7528 if (!IsAtomicReturn) { 7529 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7530 if (NewOpc != -1) 7531 Inst.setOpcode(NewOpc); 7532 } 7533 7534 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7535 SIInstrFlags::IsAtomicRet; 7536 7537 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7538 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7539 7540 // Add the register arguments 7541 if (Op.isReg()) { 7542 Op.addRegOperands(Inst, 1); 7543 if (IsAtomicReturn && i == 1) 7544 Op.addRegOperands(Inst, 1); 7545 continue; 7546 } 7547 7548 // Handle the case where soffset is an immediate 7549 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7550 Op.addImmOperands(Inst, 1); 7551 continue; 7552 } 7553 7554 // Handle tokens like 'offen' which are sometimes hard-coded into the 7555 // asm string. There are no MCInst operands for these. 7556 if (Op.isToken()) { 7557 continue; 7558 } 7559 assert(Op.isImm()); 7560 7561 // Handle optional arguments 7562 OptionalIdx[Op.getImmTy()] = i; 7563 } 7564 7565 if ((int)Inst.getNumOperands() <= 7566 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7567 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7568 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7569 } 7570 7571 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7572 const OperandVector &Operands) { 7573 for (unsigned I = 1; I < Operands.size(); ++I) { 7574 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7575 if (Operand.isReg()) 7576 Operand.addRegOperands(Inst, 1); 7577 } 7578 7579 Inst.addOperand(MCOperand::createImm(1)); // a16 7580 } 7581 7582 //===----------------------------------------------------------------------===// 7583 // smrd 7584 //===----------------------------------------------------------------------===// 7585 7586 bool AMDGPUOperand::isSMRDOffset8() const { 7587 return isImm() && isUInt<8>(getImm()); 7588 } 7589 7590 bool AMDGPUOperand::isSMEMOffset() const { 7591 return isImm(); // Offset range is checked later by validator. 7592 } 7593 7594 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7595 // 32-bit literals are only supported on CI and we only want to use them 7596 // when the offset is > 8-bits. 7597 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7598 } 7599 7600 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7601 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7602 } 7603 7604 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7605 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7606 } 7607 7608 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7609 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7610 } 7611 7612 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7613 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7614 } 7615 7616 //===----------------------------------------------------------------------===// 7617 // vop3 7618 //===----------------------------------------------------------------------===// 7619 7620 static bool ConvertOmodMul(int64_t &Mul) { 7621 if (Mul != 1 && Mul != 2 && Mul != 4) 7622 return false; 7623 7624 Mul >>= 1; 7625 return true; 7626 } 7627 7628 static bool ConvertOmodDiv(int64_t &Div) { 7629 if (Div == 1) { 7630 Div = 0; 7631 return true; 7632 } 7633 7634 if (Div == 2) { 7635 Div = 3; 7636 return true; 7637 } 7638 7639 return false; 7640 } 7641 7642 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7643 // This is intentional and ensures compatibility with sp3. 7644 // See bug 35397 for details. 7645 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7646 if (BoundCtrl == 0 || BoundCtrl == 1) { 7647 BoundCtrl = 1; 7648 return true; 7649 } 7650 return false; 7651 } 7652 7653 // Note: the order in this table matches the order of operands in AsmString. 7654 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7655 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7656 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7657 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7658 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7659 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7660 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7661 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7662 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7663 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7664 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7665 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7666 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7667 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7668 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7669 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7670 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7671 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7672 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7673 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7674 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7675 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7676 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7677 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7678 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7679 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7680 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7681 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7682 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7683 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7684 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7685 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7686 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7687 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7688 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7689 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7690 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7691 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7692 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7693 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7694 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7695 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7696 }; 7697 7698 void AMDGPUAsmParser::onBeginOfFile() { 7699 if (!getParser().getStreamer().getTargetStreamer() || 7700 getSTI().getTargetTriple().getArch() == Triple::r600) 7701 return; 7702 7703 if (!getTargetStreamer().getTargetID()) 7704 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7705 7706 if (isHsaAbiVersion3AndAbove(&getSTI())) 7707 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7708 } 7709 7710 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7711 7712 OperandMatchResultTy res = parseOptionalOpr(Operands); 7713 7714 // This is a hack to enable hardcoded mandatory operands which follow 7715 // optional operands. 7716 // 7717 // Current design assumes that all operands after the first optional operand 7718 // are also optional. However implementation of some instructions violates 7719 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7720 // 7721 // To alleviate this problem, we have to (implicitly) parse extra operands 7722 // to make sure autogenerated parser of custom operands never hit hardcoded 7723 // mandatory operands. 7724 7725 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7726 if (res != MatchOperand_Success || 7727 isToken(AsmToken::EndOfStatement)) 7728 break; 7729 7730 trySkipToken(AsmToken::Comma); 7731 res = parseOptionalOpr(Operands); 7732 } 7733 7734 return res; 7735 } 7736 7737 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7738 OperandMatchResultTy res; 7739 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7740 // try to parse any optional operand here 7741 if (Op.IsBit) { 7742 res = parseNamedBit(Op.Name, Operands, Op.Type); 7743 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7744 res = parseOModOperand(Operands); 7745 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7746 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7747 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7748 res = parseSDWASel(Operands, Op.Name, Op.Type); 7749 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7750 res = parseSDWADstUnused(Operands); 7751 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7752 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7753 Op.Type == AMDGPUOperand::ImmTyNegLo || 7754 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7755 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7756 Op.ConvertResult); 7757 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7758 res = parseDim(Operands); 7759 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7760 res = parseCPol(Operands); 7761 } else { 7762 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7763 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7764 res = parseOperandArrayWithPrefix("neg", Operands, 7765 AMDGPUOperand::ImmTyBLGP, 7766 nullptr); 7767 } 7768 } 7769 if (res != MatchOperand_NoMatch) { 7770 return res; 7771 } 7772 } 7773 return MatchOperand_NoMatch; 7774 } 7775 7776 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7777 StringRef Name = getTokenStr(); 7778 if (Name == "mul") { 7779 return parseIntWithPrefix("mul", Operands, 7780 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7781 } 7782 7783 if (Name == "div") { 7784 return parseIntWithPrefix("div", Operands, 7785 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7786 } 7787 7788 return MatchOperand_NoMatch; 7789 } 7790 7791 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7792 cvtVOP3P(Inst, Operands); 7793 7794 int Opc = Inst.getOpcode(); 7795 7796 int SrcNum; 7797 const int Ops[] = { AMDGPU::OpName::src0, 7798 AMDGPU::OpName::src1, 7799 AMDGPU::OpName::src2 }; 7800 for (SrcNum = 0; 7801 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7802 ++SrcNum); 7803 assert(SrcNum > 0); 7804 7805 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7806 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7807 7808 if ((OpSel & (1 << SrcNum)) != 0) { 7809 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7810 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7811 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7812 } 7813 } 7814 7815 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7816 // 1. This operand is input modifiers 7817 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7818 // 2. This is not last operand 7819 && Desc.NumOperands > (OpNum + 1) 7820 // 3. Next operand is register class 7821 && Desc.OpInfo[OpNum + 1].RegClass != -1 7822 // 4. Next register is not tied to any other operand 7823 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7824 } 7825 7826 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7827 { 7828 OptionalImmIndexMap OptionalIdx; 7829 unsigned Opc = Inst.getOpcode(); 7830 7831 unsigned I = 1; 7832 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7833 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7834 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7835 } 7836 7837 for (unsigned E = Operands.size(); I != E; ++I) { 7838 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7839 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7840 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7841 } else if (Op.isInterpSlot() || 7842 Op.isInterpAttr() || 7843 Op.isAttrChan()) { 7844 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7845 } else if (Op.isImmModifier()) { 7846 OptionalIdx[Op.getImmTy()] = I; 7847 } else { 7848 llvm_unreachable("unhandled operand type"); 7849 } 7850 } 7851 7852 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7853 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7854 } 7855 7856 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7857 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7858 } 7859 7860 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7861 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7862 } 7863 } 7864 7865 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7866 OptionalImmIndexMap &OptionalIdx) { 7867 unsigned Opc = Inst.getOpcode(); 7868 7869 unsigned I = 1; 7870 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7871 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7872 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7873 } 7874 7875 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7876 // This instruction has src modifiers 7877 for (unsigned E = Operands.size(); I != E; ++I) { 7878 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7879 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7880 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7881 } else if (Op.isImmModifier()) { 7882 OptionalIdx[Op.getImmTy()] = I; 7883 } else if (Op.isRegOrImm()) { 7884 Op.addRegOrImmOperands(Inst, 1); 7885 } else { 7886 llvm_unreachable("unhandled operand type"); 7887 } 7888 } 7889 } else { 7890 // No src modifiers 7891 for (unsigned E = Operands.size(); I != E; ++I) { 7892 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7893 if (Op.isMod()) { 7894 OptionalIdx[Op.getImmTy()] = I; 7895 } else { 7896 Op.addRegOrImmOperands(Inst, 1); 7897 } 7898 } 7899 } 7900 7901 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7902 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7903 } 7904 7905 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7906 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7907 } 7908 7909 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7910 // it has src2 register operand that is tied to dst operand 7911 // we don't allow modifiers for this operand in assembler so src2_modifiers 7912 // should be 0. 7913 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7914 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7915 Opc == AMDGPU::V_MAC_F32_e64_vi || 7916 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7917 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7918 Opc == AMDGPU::V_MAC_F16_e64_vi || 7919 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7920 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7921 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7922 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7923 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7924 auto it = Inst.begin(); 7925 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7926 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7927 ++it; 7928 // Copy the operand to ensure it's not invalidated when Inst grows. 7929 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7930 } 7931 } 7932 7933 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7934 OptionalImmIndexMap OptionalIdx; 7935 cvtVOP3(Inst, Operands, OptionalIdx); 7936 } 7937 7938 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7939 OptionalImmIndexMap &OptIdx) { 7940 const int Opc = Inst.getOpcode(); 7941 const MCInstrDesc &Desc = MII.get(Opc); 7942 7943 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7944 7945 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7946 assert(!IsPacked); 7947 Inst.addOperand(Inst.getOperand(0)); 7948 } 7949 7950 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7951 // instruction, and then figure out where to actually put the modifiers 7952 7953 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7954 if (OpSelIdx != -1) { 7955 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7956 } 7957 7958 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7959 if (OpSelHiIdx != -1) { 7960 int DefaultVal = IsPacked ? -1 : 0; 7961 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7962 DefaultVal); 7963 } 7964 7965 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7966 if (NegLoIdx != -1) { 7967 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7968 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7969 } 7970 7971 const int Ops[] = { AMDGPU::OpName::src0, 7972 AMDGPU::OpName::src1, 7973 AMDGPU::OpName::src2 }; 7974 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7975 AMDGPU::OpName::src1_modifiers, 7976 AMDGPU::OpName::src2_modifiers }; 7977 7978 unsigned OpSel = 0; 7979 unsigned OpSelHi = 0; 7980 unsigned NegLo = 0; 7981 unsigned NegHi = 0; 7982 7983 if (OpSelIdx != -1) 7984 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7985 7986 if (OpSelHiIdx != -1) 7987 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7988 7989 if (NegLoIdx != -1) { 7990 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7991 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7992 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7993 } 7994 7995 for (int J = 0; J < 3; ++J) { 7996 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7997 if (OpIdx == -1) 7998 break; 7999 8000 uint32_t ModVal = 0; 8001 8002 if ((OpSel & (1 << J)) != 0) 8003 ModVal |= SISrcMods::OP_SEL_0; 8004 8005 if ((OpSelHi & (1 << J)) != 0) 8006 ModVal |= SISrcMods::OP_SEL_1; 8007 8008 if ((NegLo & (1 << J)) != 0) 8009 ModVal |= SISrcMods::NEG; 8010 8011 if ((NegHi & (1 << J)) != 0) 8012 ModVal |= SISrcMods::NEG_HI; 8013 8014 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8015 8016 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8017 } 8018 } 8019 8020 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8021 OptionalImmIndexMap OptIdx; 8022 cvtVOP3(Inst, Operands, OptIdx); 8023 cvtVOP3P(Inst, Operands, OptIdx); 8024 } 8025 8026 //===----------------------------------------------------------------------===// 8027 // dpp 8028 //===----------------------------------------------------------------------===// 8029 8030 bool AMDGPUOperand::isDPP8() const { 8031 return isImmTy(ImmTyDPP8); 8032 } 8033 8034 bool AMDGPUOperand::isDPPCtrl() const { 8035 using namespace AMDGPU::DPP; 8036 8037 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8038 if (result) { 8039 int64_t Imm = getImm(); 8040 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8041 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8042 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8043 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8044 (Imm == DppCtrl::WAVE_SHL1) || 8045 (Imm == DppCtrl::WAVE_ROL1) || 8046 (Imm == DppCtrl::WAVE_SHR1) || 8047 (Imm == DppCtrl::WAVE_ROR1) || 8048 (Imm == DppCtrl::ROW_MIRROR) || 8049 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8050 (Imm == DppCtrl::BCAST15) || 8051 (Imm == DppCtrl::BCAST31) || 8052 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8053 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8054 } 8055 return false; 8056 } 8057 8058 //===----------------------------------------------------------------------===// 8059 // mAI 8060 //===----------------------------------------------------------------------===// 8061 8062 bool AMDGPUOperand::isBLGP() const { 8063 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8064 } 8065 8066 bool AMDGPUOperand::isCBSZ() const { 8067 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8068 } 8069 8070 bool AMDGPUOperand::isABID() const { 8071 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8072 } 8073 8074 bool AMDGPUOperand::isS16Imm() const { 8075 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8076 } 8077 8078 bool AMDGPUOperand::isU16Imm() const { 8079 return isImm() && isUInt<16>(getImm()); 8080 } 8081 8082 //===----------------------------------------------------------------------===// 8083 // dim 8084 //===----------------------------------------------------------------------===// 8085 8086 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8087 // We want to allow "dim:1D" etc., 8088 // but the initial 1 is tokenized as an integer. 8089 std::string Token; 8090 if (isToken(AsmToken::Integer)) { 8091 SMLoc Loc = getToken().getEndLoc(); 8092 Token = std::string(getTokenStr()); 8093 lex(); 8094 if (getLoc() != Loc) 8095 return false; 8096 } 8097 8098 StringRef Suffix; 8099 if (!parseId(Suffix)) 8100 return false; 8101 Token += Suffix; 8102 8103 StringRef DimId = Token; 8104 if (DimId.startswith("SQ_RSRC_IMG_")) 8105 DimId = DimId.drop_front(12); 8106 8107 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8108 if (!DimInfo) 8109 return false; 8110 8111 Encoding = DimInfo->Encoding; 8112 return true; 8113 } 8114 8115 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8116 if (!isGFX10Plus()) 8117 return MatchOperand_NoMatch; 8118 8119 SMLoc S = getLoc(); 8120 8121 if (!trySkipId("dim", AsmToken::Colon)) 8122 return MatchOperand_NoMatch; 8123 8124 unsigned Encoding; 8125 SMLoc Loc = getLoc(); 8126 if (!parseDimId(Encoding)) { 8127 Error(Loc, "invalid dim value"); 8128 return MatchOperand_ParseFail; 8129 } 8130 8131 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8132 AMDGPUOperand::ImmTyDim)); 8133 return MatchOperand_Success; 8134 } 8135 8136 //===----------------------------------------------------------------------===// 8137 // dpp 8138 //===----------------------------------------------------------------------===// 8139 8140 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8141 SMLoc S = getLoc(); 8142 8143 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8144 return MatchOperand_NoMatch; 8145 8146 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8147 8148 int64_t Sels[8]; 8149 8150 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8151 return MatchOperand_ParseFail; 8152 8153 for (size_t i = 0; i < 8; ++i) { 8154 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8155 return MatchOperand_ParseFail; 8156 8157 SMLoc Loc = getLoc(); 8158 if (getParser().parseAbsoluteExpression(Sels[i])) 8159 return MatchOperand_ParseFail; 8160 if (0 > Sels[i] || 7 < Sels[i]) { 8161 Error(Loc, "expected a 3-bit value"); 8162 return MatchOperand_ParseFail; 8163 } 8164 } 8165 8166 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8167 return MatchOperand_ParseFail; 8168 8169 unsigned DPP8 = 0; 8170 for (size_t i = 0; i < 8; ++i) 8171 DPP8 |= (Sels[i] << (i * 3)); 8172 8173 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8174 return MatchOperand_Success; 8175 } 8176 8177 bool 8178 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8179 const OperandVector &Operands) { 8180 if (Ctrl == "row_newbcast") 8181 return isGFX90A(); 8182 8183 if (Ctrl == "row_share" || 8184 Ctrl == "row_xmask") 8185 return isGFX10Plus(); 8186 8187 if (Ctrl == "wave_shl" || 8188 Ctrl == "wave_shr" || 8189 Ctrl == "wave_rol" || 8190 Ctrl == "wave_ror" || 8191 Ctrl == "row_bcast") 8192 return isVI() || isGFX9(); 8193 8194 return Ctrl == "row_mirror" || 8195 Ctrl == "row_half_mirror" || 8196 Ctrl == "quad_perm" || 8197 Ctrl == "row_shl" || 8198 Ctrl == "row_shr" || 8199 Ctrl == "row_ror"; 8200 } 8201 8202 int64_t 8203 AMDGPUAsmParser::parseDPPCtrlPerm() { 8204 // quad_perm:[%d,%d,%d,%d] 8205 8206 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8207 return -1; 8208 8209 int64_t Val = 0; 8210 for (int i = 0; i < 4; ++i) { 8211 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8212 return -1; 8213 8214 int64_t Temp; 8215 SMLoc Loc = getLoc(); 8216 if (getParser().parseAbsoluteExpression(Temp)) 8217 return -1; 8218 if (Temp < 0 || Temp > 3) { 8219 Error(Loc, "expected a 2-bit value"); 8220 return -1; 8221 } 8222 8223 Val += (Temp << i * 2); 8224 } 8225 8226 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8227 return -1; 8228 8229 return Val; 8230 } 8231 8232 int64_t 8233 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8234 using namespace AMDGPU::DPP; 8235 8236 // sel:%d 8237 8238 int64_t Val; 8239 SMLoc Loc = getLoc(); 8240 8241 if (getParser().parseAbsoluteExpression(Val)) 8242 return -1; 8243 8244 struct DppCtrlCheck { 8245 int64_t Ctrl; 8246 int Lo; 8247 int Hi; 8248 }; 8249 8250 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8251 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8252 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8253 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8254 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8255 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8256 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8257 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8258 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8259 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8260 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8261 .Default({-1, 0, 0}); 8262 8263 bool Valid; 8264 if (Check.Ctrl == -1) { 8265 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8266 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8267 } else { 8268 Valid = Check.Lo <= Val && Val <= Check.Hi; 8269 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8270 } 8271 8272 if (!Valid) { 8273 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8274 return -1; 8275 } 8276 8277 return Val; 8278 } 8279 8280 OperandMatchResultTy 8281 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8282 using namespace AMDGPU::DPP; 8283 8284 if (!isToken(AsmToken::Identifier) || 8285 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8286 return MatchOperand_NoMatch; 8287 8288 SMLoc S = getLoc(); 8289 int64_t Val = -1; 8290 StringRef Ctrl; 8291 8292 parseId(Ctrl); 8293 8294 if (Ctrl == "row_mirror") { 8295 Val = DppCtrl::ROW_MIRROR; 8296 } else if (Ctrl == "row_half_mirror") { 8297 Val = DppCtrl::ROW_HALF_MIRROR; 8298 } else { 8299 if (skipToken(AsmToken::Colon, "expected a colon")) { 8300 if (Ctrl == "quad_perm") { 8301 Val = parseDPPCtrlPerm(); 8302 } else { 8303 Val = parseDPPCtrlSel(Ctrl); 8304 } 8305 } 8306 } 8307 8308 if (Val == -1) 8309 return MatchOperand_ParseFail; 8310 8311 Operands.push_back( 8312 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8313 return MatchOperand_Success; 8314 } 8315 8316 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8317 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8318 } 8319 8320 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8321 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8322 } 8323 8324 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8325 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8326 } 8327 8328 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8329 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8330 } 8331 8332 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8333 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8334 } 8335 8336 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8337 OptionalImmIndexMap OptionalIdx; 8338 8339 unsigned Opc = Inst.getOpcode(); 8340 bool HasModifiers = 8341 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8342 unsigned I = 1; 8343 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8344 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8345 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8346 } 8347 8348 int Fi = 0; 8349 for (unsigned E = Operands.size(); I != E; ++I) { 8350 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8351 MCOI::TIED_TO); 8352 if (TiedTo != -1) { 8353 assert((unsigned)TiedTo < Inst.getNumOperands()); 8354 // handle tied old or src2 for MAC instructions 8355 Inst.addOperand(Inst.getOperand(TiedTo)); 8356 } 8357 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8358 // Add the register arguments 8359 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8360 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8361 // Skip it. 8362 continue; 8363 } 8364 8365 if (IsDPP8) { 8366 if (Op.isDPP8()) { 8367 Op.addImmOperands(Inst, 1); 8368 } else if (HasModifiers && 8369 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8370 Op.addRegWithFPInputModsOperands(Inst, 2); 8371 } else if (Op.isFI()) { 8372 Fi = Op.getImm(); 8373 } else if (Op.isReg()) { 8374 Op.addRegOperands(Inst, 1); 8375 } else { 8376 llvm_unreachable("Invalid operand type"); 8377 } 8378 } else { 8379 if (HasModifiers && 8380 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8381 Op.addRegWithFPInputModsOperands(Inst, 2); 8382 } else if (Op.isReg()) { 8383 Op.addRegOperands(Inst, 1); 8384 } else if (Op.isDPPCtrl()) { 8385 Op.addImmOperands(Inst, 1); 8386 } else if (Op.isImm()) { 8387 // Handle optional arguments 8388 OptionalIdx[Op.getImmTy()] = I; 8389 } else { 8390 llvm_unreachable("Invalid operand type"); 8391 } 8392 } 8393 } 8394 8395 if (IsDPP8) { 8396 using namespace llvm::AMDGPU::DPP; 8397 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8398 } else { 8399 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8400 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8401 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8402 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8403 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8404 } 8405 } 8406 } 8407 8408 //===----------------------------------------------------------------------===// 8409 // sdwa 8410 //===----------------------------------------------------------------------===// 8411 8412 OperandMatchResultTy 8413 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8414 AMDGPUOperand::ImmTy Type) { 8415 using namespace llvm::AMDGPU::SDWA; 8416 8417 SMLoc S = getLoc(); 8418 StringRef Value; 8419 OperandMatchResultTy res; 8420 8421 SMLoc StringLoc; 8422 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8423 if (res != MatchOperand_Success) { 8424 return res; 8425 } 8426 8427 int64_t Int; 8428 Int = StringSwitch<int64_t>(Value) 8429 .Case("BYTE_0", SdwaSel::BYTE_0) 8430 .Case("BYTE_1", SdwaSel::BYTE_1) 8431 .Case("BYTE_2", SdwaSel::BYTE_2) 8432 .Case("BYTE_3", SdwaSel::BYTE_3) 8433 .Case("WORD_0", SdwaSel::WORD_0) 8434 .Case("WORD_1", SdwaSel::WORD_1) 8435 .Case("DWORD", SdwaSel::DWORD) 8436 .Default(0xffffffff); 8437 8438 if (Int == 0xffffffff) { 8439 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8440 return MatchOperand_ParseFail; 8441 } 8442 8443 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8444 return MatchOperand_Success; 8445 } 8446 8447 OperandMatchResultTy 8448 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8449 using namespace llvm::AMDGPU::SDWA; 8450 8451 SMLoc S = getLoc(); 8452 StringRef Value; 8453 OperandMatchResultTy res; 8454 8455 SMLoc StringLoc; 8456 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8457 if (res != MatchOperand_Success) { 8458 return res; 8459 } 8460 8461 int64_t Int; 8462 Int = StringSwitch<int64_t>(Value) 8463 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8464 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8465 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8466 .Default(0xffffffff); 8467 8468 if (Int == 0xffffffff) { 8469 Error(StringLoc, "invalid dst_unused value"); 8470 return MatchOperand_ParseFail; 8471 } 8472 8473 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8474 return MatchOperand_Success; 8475 } 8476 8477 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8478 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8479 } 8480 8481 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8482 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8483 } 8484 8485 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8486 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8487 } 8488 8489 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8490 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8491 } 8492 8493 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8494 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8495 } 8496 8497 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8498 uint64_t BasicInstType, 8499 bool SkipDstVcc, 8500 bool SkipSrcVcc) { 8501 using namespace llvm::AMDGPU::SDWA; 8502 8503 OptionalImmIndexMap OptionalIdx; 8504 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8505 bool SkippedVcc = false; 8506 8507 unsigned I = 1; 8508 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8509 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8510 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8511 } 8512 8513 for (unsigned E = Operands.size(); I != E; ++I) { 8514 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8515 if (SkipVcc && !SkippedVcc && Op.isReg() && 8516 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8517 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8518 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8519 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8520 // Skip VCC only if we didn't skip it on previous iteration. 8521 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8522 if (BasicInstType == SIInstrFlags::VOP2 && 8523 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8524 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8525 SkippedVcc = true; 8526 continue; 8527 } else if (BasicInstType == SIInstrFlags::VOPC && 8528 Inst.getNumOperands() == 0) { 8529 SkippedVcc = true; 8530 continue; 8531 } 8532 } 8533 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8534 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8535 } else if (Op.isImm()) { 8536 // Handle optional arguments 8537 OptionalIdx[Op.getImmTy()] = I; 8538 } else { 8539 llvm_unreachable("Invalid operand type"); 8540 } 8541 SkippedVcc = false; 8542 } 8543 8544 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8545 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8546 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8547 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8548 switch (BasicInstType) { 8549 case SIInstrFlags::VOP1: 8550 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8551 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8552 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8553 } 8554 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8555 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8556 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8557 break; 8558 8559 case SIInstrFlags::VOP2: 8560 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8561 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8562 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8563 } 8564 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8565 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8566 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8567 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8568 break; 8569 8570 case SIInstrFlags::VOPC: 8571 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8575 break; 8576 8577 default: 8578 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8579 } 8580 } 8581 8582 // special case v_mac_{f16, f32}: 8583 // it has src2 register operand that is tied to dst operand 8584 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8585 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8586 auto it = Inst.begin(); 8587 std::advance( 8588 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8589 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8590 } 8591 } 8592 8593 //===----------------------------------------------------------------------===// 8594 // mAI 8595 //===----------------------------------------------------------------------===// 8596 8597 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8598 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8599 } 8600 8601 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8602 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8603 } 8604 8605 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8606 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8607 } 8608 8609 /// Force static initialization. 8610 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8611 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8612 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8613 } 8614 8615 #define GET_REGISTER_MATCHER 8616 #define GET_MATCHER_IMPLEMENTATION 8617 #define GET_MNEMONIC_SPELL_CHECKER 8618 #define GET_MNEMONIC_CHECKER 8619 #include "AMDGPUGenAsmMatcher.inc" 8620 8621 // This function should be defined after auto-generated include so that we have 8622 // MatchClassKind enum defined 8623 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8624 unsigned Kind) { 8625 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8626 // But MatchInstructionImpl() expects to meet token and fails to validate 8627 // operand. This method checks if we are given immediate operand but expect to 8628 // get corresponding token. 8629 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8630 switch (Kind) { 8631 case MCK_addr64: 8632 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8633 case MCK_gds: 8634 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8635 case MCK_lds: 8636 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8637 case MCK_idxen: 8638 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8639 case MCK_offen: 8640 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8641 case MCK_SSrcB32: 8642 // When operands have expression values, they will return true for isToken, 8643 // because it is not possible to distinguish between a token and an 8644 // expression at parse time. MatchInstructionImpl() will always try to 8645 // match an operand as a token, when isToken returns true, and when the 8646 // name of the expression is not a valid token, the match will fail, 8647 // so we need to handle it here. 8648 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8649 case MCK_SSrcF32: 8650 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8651 case MCK_SoppBrTarget: 8652 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8653 case MCK_VReg32OrOff: 8654 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8655 case MCK_InterpSlot: 8656 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8657 case MCK_Attr: 8658 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8659 case MCK_AttrChan: 8660 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8661 case MCK_ImmSMEMOffset: 8662 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8663 case MCK_SReg_64: 8664 case MCK_SReg_64_XEXEC: 8665 // Null is defined as a 32-bit register but 8666 // it should also be enabled with 64-bit operands. 8667 // The following code enables it for SReg_64 operands 8668 // used as source and destination. Remaining source 8669 // operands are handled in isInlinableImm. 8670 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8671 default: 8672 return Match_InvalidOperand; 8673 } 8674 } 8675 8676 //===----------------------------------------------------------------------===// 8677 // endpgm 8678 //===----------------------------------------------------------------------===// 8679 8680 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8681 SMLoc S = getLoc(); 8682 int64_t Imm = 0; 8683 8684 if (!parseExpr(Imm)) { 8685 // The operand is optional, if not present default to 0 8686 Imm = 0; 8687 } 8688 8689 if (!isUInt<16>(Imm)) { 8690 Error(S, "expected a 16-bit value"); 8691 return MatchOperand_ParseFail; 8692 } 8693 8694 Operands.push_back( 8695 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8696 return MatchOperand_Success; 8697 } 8698 8699 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8700