1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isDepCtr() const; 822 bool isSDelayAlu() const; 823 bool isHwreg() const; 824 bool isSendMsg() const; 825 bool isSwizzle() const; 826 bool isSMRDOffset8() const; 827 bool isSMEMOffset() const; 828 bool isSMRDLiteralOffset() const; 829 bool isDPP8() const; 830 bool isDPPCtrl() const; 831 bool isBLGP() const; 832 bool isCBSZ() const; 833 bool isABID() const; 834 bool isGPRIdxMode() const; 835 bool isS16Imm() const; 836 bool isU16Imm() const; 837 bool isEndpgm() const; 838 839 StringRef getExpressionAsToken() const { 840 assert(isExpr()); 841 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 842 return S->getSymbol().getName(); 843 } 844 845 StringRef getToken() const { 846 assert(isToken()); 847 848 if (Kind == Expression) 849 return getExpressionAsToken(); 850 851 return StringRef(Tok.Data, Tok.Length); 852 } 853 854 int64_t getImm() const { 855 assert(isImm()); 856 return Imm.Val; 857 } 858 859 void setImm(int64_t Val) { 860 assert(isImm()); 861 Imm.Val = Val; 862 } 863 864 ImmTy getImmTy() const { 865 assert(isImm()); 866 return Imm.Type; 867 } 868 869 unsigned getReg() const override { 870 assert(isRegKind()); 871 return Reg.RegNo; 872 } 873 874 SMLoc getStartLoc() const override { 875 return StartLoc; 876 } 877 878 SMLoc getEndLoc() const override { 879 return EndLoc; 880 } 881 882 SMRange getLocRange() const { 883 return SMRange(StartLoc, EndLoc); 884 } 885 886 Modifiers getModifiers() const { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 return isRegKind() ? Reg.Mods : Imm.Mods; 889 } 890 891 void setModifiers(Modifiers Mods) { 892 assert(isRegKind() || isImmTy(ImmTyNone)); 893 if (isRegKind()) 894 Reg.Mods = Mods; 895 else 896 Imm.Mods = Mods; 897 } 898 899 bool hasModifiers() const { 900 return getModifiers().hasModifiers(); 901 } 902 903 bool hasFPModifiers() const { 904 return getModifiers().hasFPModifiers(); 905 } 906 907 bool hasIntModifiers() const { 908 return getModifiers().hasIntModifiers(); 909 } 910 911 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 912 913 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 914 915 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 916 917 template <unsigned Bitwidth> 918 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 919 920 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 921 addKImmFPOperands<16>(Inst, N); 922 } 923 924 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 925 addKImmFPOperands<32>(Inst, N); 926 } 927 928 void addRegOperands(MCInst &Inst, unsigned N) const; 929 930 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 931 addRegOperands(Inst, N); 932 } 933 934 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 935 if (isRegKind()) 936 addRegOperands(Inst, N); 937 else if (isExpr()) 938 Inst.addOperand(MCOperand::createExpr(Expr)); 939 else 940 addImmOperands(Inst, N); 941 } 942 943 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 944 Modifiers Mods = getModifiers(); 945 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 946 if (isRegKind()) { 947 addRegOperands(Inst, N); 948 } else { 949 addImmOperands(Inst, N, false); 950 } 951 } 952 953 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasIntModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 959 assert(!hasFPModifiers()); 960 addRegOrImmWithInputModsOperands(Inst, N); 961 } 962 963 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 964 Modifiers Mods = getModifiers(); 965 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 966 assert(isRegKind()); 967 addRegOperands(Inst, N); 968 } 969 970 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasIntModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasFPModifiers()); 977 addRegWithInputModsOperands(Inst, N); 978 } 979 980 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 981 if (isImm()) 982 addImmOperands(Inst, N); 983 else { 984 assert(isExpr()); 985 Inst.addOperand(MCOperand::createExpr(Expr)); 986 } 987 } 988 989 static void printImmTy(raw_ostream& OS, ImmTy Type) { 990 switch (Type) { 991 case ImmTyNone: OS << "None"; break; 992 case ImmTyGDS: OS << "GDS"; break; 993 case ImmTyLDS: OS << "LDS"; break; 994 case ImmTyOffen: OS << "Offen"; break; 995 case ImmTyIdxen: OS << "Idxen"; break; 996 case ImmTyAddr64: OS << "Addr64"; break; 997 case ImmTyOffset: OS << "Offset"; break; 998 case ImmTyInstOffset: OS << "InstOffset"; break; 999 case ImmTyOffset0: OS << "Offset0"; break; 1000 case ImmTyOffset1: OS << "Offset1"; break; 1001 case ImmTyCPol: OS << "CPol"; break; 1002 case ImmTySWZ: OS << "SWZ"; break; 1003 case ImmTyTFE: OS << "TFE"; break; 1004 case ImmTyD16: OS << "D16"; break; 1005 case ImmTyFORMAT: OS << "FORMAT"; break; 1006 case ImmTyClampSI: OS << "ClampSI"; break; 1007 case ImmTyOModSI: OS << "OModSI"; break; 1008 case ImmTyDPP8: OS << "DPP8"; break; 1009 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1010 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1011 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1012 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1013 case ImmTyDppFi: OS << "FI"; break; 1014 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1015 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1016 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1017 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1018 case ImmTyDMask: OS << "DMask"; break; 1019 case ImmTyDim: OS << "Dim"; break; 1020 case ImmTyUNorm: OS << "UNorm"; break; 1021 case ImmTyDA: OS << "DA"; break; 1022 case ImmTyR128A16: OS << "R128A16"; break; 1023 case ImmTyA16: OS << "A16"; break; 1024 case ImmTyLWE: OS << "LWE"; break; 1025 case ImmTyOff: OS << "Off"; break; 1026 case ImmTyExpTgt: OS << "ExpTgt"; break; 1027 case ImmTyExpCompr: OS << "ExpCompr"; break; 1028 case ImmTyExpVM: OS << "ExpVM"; break; 1029 case ImmTyHwreg: OS << "Hwreg"; break; 1030 case ImmTySendMsg: OS << "SendMsg"; break; 1031 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1032 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1033 case ImmTyAttrChan: OS << "AttrChan"; break; 1034 case ImmTyOpSel: OS << "OpSel"; break; 1035 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1036 case ImmTyNegLo: OS << "NegLo"; break; 1037 case ImmTyNegHi: OS << "NegHi"; break; 1038 case ImmTySwizzle: OS << "Swizzle"; break; 1039 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1040 case ImmTyHigh: OS << "High"; break; 1041 case ImmTyBLGP: OS << "BLGP"; break; 1042 case ImmTyCBSZ: OS << "CBSZ"; break; 1043 case ImmTyABID: OS << "ABID"; break; 1044 case ImmTyEndpgm: OS << "Endpgm"; break; 1045 } 1046 } 1047 1048 void print(raw_ostream &OS) const override { 1049 switch (Kind) { 1050 case Register: 1051 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1052 break; 1053 case Immediate: 1054 OS << '<' << getImm(); 1055 if (getImmTy() != ImmTyNone) { 1056 OS << " type: "; printImmTy(OS, getImmTy()); 1057 } 1058 OS << " mods: " << Imm.Mods << '>'; 1059 break; 1060 case Token: 1061 OS << '\'' << getToken() << '\''; 1062 break; 1063 case Expression: 1064 OS << "<expr " << *Expr << '>'; 1065 break; 1066 } 1067 } 1068 1069 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1070 int64_t Val, SMLoc Loc, 1071 ImmTy Type = ImmTyNone, 1072 bool IsFPImm = false) { 1073 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1074 Op->Imm.Val = Val; 1075 Op->Imm.IsFPImm = IsFPImm; 1076 Op->Imm.Kind = ImmKindTyNone; 1077 Op->Imm.Type = Type; 1078 Op->Imm.Mods = Modifiers(); 1079 Op->StartLoc = Loc; 1080 Op->EndLoc = Loc; 1081 return Op; 1082 } 1083 1084 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1085 StringRef Str, SMLoc Loc, 1086 bool HasExplicitEncodingSize = true) { 1087 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1088 Res->Tok.Data = Str.data(); 1089 Res->Tok.Length = Str.size(); 1090 Res->StartLoc = Loc; 1091 Res->EndLoc = Loc; 1092 return Res; 1093 } 1094 1095 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1096 unsigned RegNo, SMLoc S, 1097 SMLoc E) { 1098 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1099 Op->Reg.RegNo = RegNo; 1100 Op->Reg.Mods = Modifiers(); 1101 Op->StartLoc = S; 1102 Op->EndLoc = E; 1103 return Op; 1104 } 1105 1106 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1107 const class MCExpr *Expr, SMLoc S) { 1108 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1109 Op->Expr = Expr; 1110 Op->StartLoc = S; 1111 Op->EndLoc = S; 1112 return Op; 1113 } 1114 }; 1115 1116 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1117 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1118 return OS; 1119 } 1120 1121 //===----------------------------------------------------------------------===// 1122 // AsmParser 1123 //===----------------------------------------------------------------------===// 1124 1125 // Holds info related to the current kernel, e.g. count of SGPRs used. 1126 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1127 // .amdgpu_hsa_kernel or at EOF. 1128 class KernelScopeInfo { 1129 int SgprIndexUnusedMin = -1; 1130 int VgprIndexUnusedMin = -1; 1131 int AgprIndexUnusedMin = -1; 1132 MCContext *Ctx = nullptr; 1133 MCSubtargetInfo const *MSTI = nullptr; 1134 1135 void usesSgprAt(int i) { 1136 if (i >= SgprIndexUnusedMin) { 1137 SgprIndexUnusedMin = ++i; 1138 if (Ctx) { 1139 MCSymbol* const Sym = 1140 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1141 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1142 } 1143 } 1144 } 1145 1146 void usesVgprAt(int i) { 1147 if (i >= VgprIndexUnusedMin) { 1148 VgprIndexUnusedMin = ++i; 1149 if (Ctx) { 1150 MCSymbol* const Sym = 1151 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1152 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1153 VgprIndexUnusedMin); 1154 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1155 } 1156 } 1157 } 1158 1159 void usesAgprAt(int i) { 1160 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1161 if (!hasMAIInsts(*MSTI)) 1162 return; 1163 1164 if (i >= AgprIndexUnusedMin) { 1165 AgprIndexUnusedMin = ++i; 1166 if (Ctx) { 1167 MCSymbol* const Sym = 1168 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1169 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1170 1171 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1172 MCSymbol* const vSym = 1173 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1174 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1175 VgprIndexUnusedMin); 1176 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1177 } 1178 } 1179 } 1180 1181 public: 1182 KernelScopeInfo() = default; 1183 1184 void initialize(MCContext &Context) { 1185 Ctx = &Context; 1186 MSTI = Ctx->getSubtargetInfo(); 1187 1188 usesSgprAt(SgprIndexUnusedMin = -1); 1189 usesVgprAt(VgprIndexUnusedMin = -1); 1190 if (hasMAIInsts(*MSTI)) { 1191 usesAgprAt(AgprIndexUnusedMin = -1); 1192 } 1193 } 1194 1195 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1196 unsigned RegWidth) { 1197 switch (RegKind) { 1198 case IS_SGPR: 1199 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1200 break; 1201 case IS_AGPR: 1202 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1203 break; 1204 case IS_VGPR: 1205 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1206 break; 1207 default: 1208 break; 1209 } 1210 } 1211 }; 1212 1213 class AMDGPUAsmParser : public MCTargetAsmParser { 1214 MCAsmParser &Parser; 1215 1216 // Number of extra operands parsed after the first optional operand. 1217 // This may be necessary to skip hardcoded mandatory operands. 1218 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1219 1220 unsigned ForcedEncodingSize = 0; 1221 bool ForcedDPP = false; 1222 bool ForcedSDWA = false; 1223 KernelScopeInfo KernelScope; 1224 unsigned CPolSeen; 1225 1226 /// @name Auto-generated Match Functions 1227 /// { 1228 1229 #define GET_ASSEMBLER_HEADER 1230 #include "AMDGPUGenAsmMatcher.inc" 1231 1232 /// } 1233 1234 private: 1235 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1236 bool OutOfRangeError(SMRange Range); 1237 /// Calculate VGPR/SGPR blocks required for given target, reserved 1238 /// registers, and user-specified NextFreeXGPR values. 1239 /// 1240 /// \param Features [in] Target features, used for bug corrections. 1241 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1242 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1243 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1244 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1245 /// descriptor field, if valid. 1246 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1247 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1248 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1249 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1250 /// \param VGPRBlocks [out] Result VGPR block count. 1251 /// \param SGPRBlocks [out] Result SGPR block count. 1252 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1253 bool FlatScrUsed, bool XNACKUsed, 1254 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1255 SMRange VGPRRange, unsigned NextFreeSGPR, 1256 SMRange SGPRRange, unsigned &VGPRBlocks, 1257 unsigned &SGPRBlocks); 1258 bool ParseDirectiveAMDGCNTarget(); 1259 bool ParseDirectiveAMDHSAKernel(); 1260 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1261 bool ParseDirectiveHSACodeObjectVersion(); 1262 bool ParseDirectiveHSACodeObjectISA(); 1263 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1264 bool ParseDirectiveAMDKernelCodeT(); 1265 // TODO: Possibly make subtargetHasRegister const. 1266 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1267 bool ParseDirectiveAMDGPUHsaKernel(); 1268 1269 bool ParseDirectiveISAVersion(); 1270 bool ParseDirectiveHSAMetadata(); 1271 bool ParseDirectivePALMetadataBegin(); 1272 bool ParseDirectivePALMetadata(); 1273 bool ParseDirectiveAMDGPULDS(); 1274 1275 /// Common code to parse out a block of text (typically YAML) between start and 1276 /// end directives. 1277 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1278 const char *AssemblerDirectiveEnd, 1279 std::string &CollectString); 1280 1281 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1282 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1283 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1284 unsigned &RegNum, unsigned &RegWidth, 1285 bool RestoreOnFailure = false); 1286 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1287 unsigned &RegNum, unsigned &RegWidth, 1288 SmallVectorImpl<AsmToken> &Tokens); 1289 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1290 unsigned &RegWidth, 1291 SmallVectorImpl<AsmToken> &Tokens); 1292 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1293 unsigned &RegWidth, 1294 SmallVectorImpl<AsmToken> &Tokens); 1295 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1296 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1297 bool ParseRegRange(unsigned& Num, unsigned& Width); 1298 unsigned getRegularReg(RegisterKind RegKind, 1299 unsigned RegNum, 1300 unsigned RegWidth, 1301 SMLoc Loc); 1302 1303 bool isRegister(); 1304 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1305 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1306 void initializeGprCountSymbol(RegisterKind RegKind); 1307 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1308 unsigned RegWidth); 1309 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1310 bool IsAtomic, bool IsLds = false); 1311 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1312 bool IsGdsHardcoded); 1313 1314 public: 1315 enum AMDGPUMatchResultTy { 1316 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1317 }; 1318 enum OperandMode { 1319 OperandMode_Default, 1320 OperandMode_NSA, 1321 }; 1322 1323 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1324 1325 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1326 const MCInstrInfo &MII, 1327 const MCTargetOptions &Options) 1328 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1329 MCAsmParserExtension::Initialize(Parser); 1330 1331 if (getFeatureBits().none()) { 1332 // Set default features. 1333 copySTI().ToggleFeature("southern-islands"); 1334 } 1335 1336 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1337 1338 { 1339 // TODO: make those pre-defined variables read-only. 1340 // Currently there is none suitable machinery in the core llvm-mc for this. 1341 // MCSymbol::isRedefinable is intended for another purpose, and 1342 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1343 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1344 MCContext &Ctx = getContext(); 1345 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1346 MCSymbol *Sym = 1347 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1348 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1349 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1350 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1351 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1352 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1353 } else { 1354 MCSymbol *Sym = 1355 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1357 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1359 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1360 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1361 } 1362 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1363 initializeGprCountSymbol(IS_VGPR); 1364 initializeGprCountSymbol(IS_SGPR); 1365 } else 1366 KernelScope.initialize(getContext()); 1367 } 1368 } 1369 1370 bool hasMIMG_R128() const { 1371 return AMDGPU::hasMIMG_R128(getSTI()); 1372 } 1373 1374 bool hasPackedD16() const { 1375 return AMDGPU::hasPackedD16(getSTI()); 1376 } 1377 1378 bool hasGFX10A16() const { 1379 return AMDGPU::hasGFX10A16(getSTI()); 1380 } 1381 1382 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1383 1384 bool isSI() const { 1385 return AMDGPU::isSI(getSTI()); 1386 } 1387 1388 bool isCI() const { 1389 return AMDGPU::isCI(getSTI()); 1390 } 1391 1392 bool isVI() const { 1393 return AMDGPU::isVI(getSTI()); 1394 } 1395 1396 bool isGFX9() const { 1397 return AMDGPU::isGFX9(getSTI()); 1398 } 1399 1400 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1401 bool isGFX90A() const { 1402 return AMDGPU::isGFX90A(getSTI()); 1403 } 1404 1405 bool isGFX940() const { 1406 return AMDGPU::isGFX940(getSTI()); 1407 } 1408 1409 bool isGFX9Plus() const { 1410 return AMDGPU::isGFX9Plus(getSTI()); 1411 } 1412 1413 bool isGFX10() const { 1414 return AMDGPU::isGFX10(getSTI()); 1415 } 1416 1417 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1418 1419 bool isGFX11() const { 1420 return AMDGPU::isGFX11(getSTI()); 1421 } 1422 1423 bool isGFX11Plus() const { 1424 return AMDGPU::isGFX11Plus(getSTI()); 1425 } 1426 1427 bool isGFX10_BEncoding() const { 1428 return AMDGPU::isGFX10_BEncoding(getSTI()); 1429 } 1430 1431 bool hasInv2PiInlineImm() const { 1432 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1433 } 1434 1435 bool hasFlatOffsets() const { 1436 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1437 } 1438 1439 bool hasArchitectedFlatScratch() const { 1440 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1441 } 1442 1443 bool hasSGPR102_SGPR103() const { 1444 return !isVI() && !isGFX9(); 1445 } 1446 1447 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1448 1449 bool hasIntClamp() const { 1450 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1451 } 1452 1453 AMDGPUTargetStreamer &getTargetStreamer() { 1454 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1455 return static_cast<AMDGPUTargetStreamer &>(TS); 1456 } 1457 1458 const MCRegisterInfo *getMRI() const { 1459 // We need this const_cast because for some reason getContext() is not const 1460 // in MCAsmParser. 1461 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1462 } 1463 1464 const MCInstrInfo *getMII() const { 1465 return &MII; 1466 } 1467 1468 const FeatureBitset &getFeatureBits() const { 1469 return getSTI().getFeatureBits(); 1470 } 1471 1472 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1473 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1474 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1475 1476 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1477 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1478 bool isForcedDPP() const { return ForcedDPP; } 1479 bool isForcedSDWA() const { return ForcedSDWA; } 1480 ArrayRef<unsigned> getMatchedVariants() const; 1481 StringRef getMatchedVariantName() const; 1482 1483 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1484 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1485 bool RestoreOnFailure); 1486 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1487 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1488 SMLoc &EndLoc) override; 1489 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1490 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1491 unsigned Kind) override; 1492 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1493 OperandVector &Operands, MCStreamer &Out, 1494 uint64_t &ErrorInfo, 1495 bool MatchingInlineAsm) override; 1496 bool ParseDirective(AsmToken DirectiveID) override; 1497 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1498 OperandMode Mode = OperandMode_Default); 1499 StringRef parseMnemonicSuffix(StringRef Name); 1500 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1501 SMLoc NameLoc, OperandVector &Operands) override; 1502 //bool ProcessInstruction(MCInst &Inst); 1503 1504 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1505 1506 OperandMatchResultTy 1507 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1508 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1509 bool (*ConvertResult)(int64_t &) = nullptr); 1510 1511 OperandMatchResultTy 1512 parseOperandArrayWithPrefix(const char *Prefix, 1513 OperandVector &Operands, 1514 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1515 bool (*ConvertResult)(int64_t&) = nullptr); 1516 1517 OperandMatchResultTy 1518 parseNamedBit(StringRef Name, OperandVector &Operands, 1519 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1520 OperandMatchResultTy parseCPol(OperandVector &Operands); 1521 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1522 StringRef &Value, 1523 SMLoc &StringLoc); 1524 1525 bool isModifier(); 1526 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1527 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1528 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1529 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1530 bool parseSP3NegModifier(); 1531 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1532 OperandMatchResultTy parseReg(OperandVector &Operands); 1533 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1534 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1535 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1536 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1537 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1538 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1539 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1540 OperandMatchResultTy parseUfmt(int64_t &Format); 1541 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1542 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1543 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1544 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1545 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1546 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1547 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1548 1549 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1550 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1551 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1552 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1553 1554 bool parseCnt(int64_t &IntVal); 1555 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1556 1557 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1558 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1559 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1560 1561 bool parseDelay(int64_t &Delay); 1562 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1563 1564 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1565 1566 private: 1567 struct OperandInfoTy { 1568 SMLoc Loc; 1569 int64_t Id; 1570 bool IsSymbolic = false; 1571 bool IsDefined = false; 1572 1573 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1574 }; 1575 1576 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1577 bool validateSendMsg(const OperandInfoTy &Msg, 1578 const OperandInfoTy &Op, 1579 const OperandInfoTy &Stream); 1580 1581 bool parseHwregBody(OperandInfoTy &HwReg, 1582 OperandInfoTy &Offset, 1583 OperandInfoTy &Width); 1584 bool validateHwreg(const OperandInfoTy &HwReg, 1585 const OperandInfoTy &Offset, 1586 const OperandInfoTy &Width); 1587 1588 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1589 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1590 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1591 1592 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1593 const OperandVector &Operands) const; 1594 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1595 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1596 SMLoc getLitLoc(const OperandVector &Operands) const; 1597 SMLoc getConstLoc(const OperandVector &Operands) const; 1598 1599 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1600 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1601 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1602 bool validateSOPLiteral(const MCInst &Inst) const; 1603 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1605 bool validateIntClampSupported(const MCInst &Inst); 1606 bool validateMIMGAtomicDMask(const MCInst &Inst); 1607 bool validateMIMGGatherDMask(const MCInst &Inst); 1608 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1609 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1610 bool validateMIMGAddrSize(const MCInst &Inst); 1611 bool validateMIMGD16(const MCInst &Inst); 1612 bool validateMIMGDim(const MCInst &Inst); 1613 bool validateMIMGMSAA(const MCInst &Inst); 1614 bool validateOpSel(const MCInst &Inst); 1615 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1616 bool validateVccOperand(unsigned Reg) const; 1617 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1618 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1619 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1620 bool validateAGPRLdSt(const MCInst &Inst) const; 1621 bool validateVGPRAlign(const MCInst &Inst) const; 1622 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1623 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1624 bool validateDivScale(const MCInst &Inst); 1625 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1626 const SMLoc &IDLoc); 1627 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1628 const SMLoc &IDLoc); 1629 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1630 unsigned getConstantBusLimit(unsigned Opcode) const; 1631 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1632 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1633 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1634 1635 bool isSupportedMnemo(StringRef Mnemo, 1636 const FeatureBitset &FBS); 1637 bool isSupportedMnemo(StringRef Mnemo, 1638 const FeatureBitset &FBS, 1639 ArrayRef<unsigned> Variants); 1640 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1641 1642 bool isId(const StringRef Id) const; 1643 bool isId(const AsmToken &Token, const StringRef Id) const; 1644 bool isToken(const AsmToken::TokenKind Kind) const; 1645 bool trySkipId(const StringRef Id); 1646 bool trySkipId(const StringRef Pref, const StringRef Id); 1647 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1648 bool trySkipToken(const AsmToken::TokenKind Kind); 1649 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1650 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1651 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1652 1653 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1654 AsmToken::TokenKind getTokenKind() const; 1655 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1656 bool parseExpr(OperandVector &Operands); 1657 StringRef getTokenStr() const; 1658 AsmToken peekToken(); 1659 AsmToken getToken() const; 1660 SMLoc getLoc() const; 1661 void lex(); 1662 1663 public: 1664 void onBeginOfFile() override; 1665 1666 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1667 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1668 1669 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1670 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1671 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1672 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1673 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1674 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1675 1676 bool parseSwizzleOperand(int64_t &Op, 1677 const unsigned MinVal, 1678 const unsigned MaxVal, 1679 const StringRef ErrMsg, 1680 SMLoc &Loc); 1681 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1682 const unsigned MinVal, 1683 const unsigned MaxVal, 1684 const StringRef ErrMsg); 1685 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1686 bool parseSwizzleOffset(int64_t &Imm); 1687 bool parseSwizzleMacro(int64_t &Imm); 1688 bool parseSwizzleQuadPerm(int64_t &Imm); 1689 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1690 bool parseSwizzleBroadcast(int64_t &Imm); 1691 bool parseSwizzleSwap(int64_t &Imm); 1692 bool parseSwizzleReverse(int64_t &Imm); 1693 1694 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1695 int64_t parseGPRIdxMacro(); 1696 1697 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1698 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1699 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1700 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1701 1702 AMDGPUOperand::Ptr defaultCPol() const; 1703 1704 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1705 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1706 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1707 AMDGPUOperand::Ptr defaultFlatOffset() const; 1708 1709 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1710 1711 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1712 OptionalImmIndexMap &OptionalIdx); 1713 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1714 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1715 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1716 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1717 OptionalImmIndexMap &OptionalIdx); 1718 1719 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1720 1721 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1722 bool IsAtomic = false); 1723 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1724 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1725 1726 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1727 1728 bool parseDimId(unsigned &Encoding); 1729 OperandMatchResultTy parseDim(OperandVector &Operands); 1730 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1731 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1732 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1733 int64_t parseDPPCtrlSel(StringRef Ctrl); 1734 int64_t parseDPPCtrlPerm(); 1735 AMDGPUOperand::Ptr defaultRowMask() const; 1736 AMDGPUOperand::Ptr defaultBankMask() const; 1737 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1738 AMDGPUOperand::Ptr defaultFI() const; 1739 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1740 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1741 1742 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1743 AMDGPUOperand::ImmTy Type); 1744 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1745 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1746 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1747 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1748 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1749 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1750 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1751 uint64_t BasicInstType, 1752 bool SkipDstVcc = false, 1753 bool SkipSrcVcc = false); 1754 1755 AMDGPUOperand::Ptr defaultBLGP() const; 1756 AMDGPUOperand::Ptr defaultCBSZ() const; 1757 AMDGPUOperand::Ptr defaultABID() const; 1758 1759 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1760 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1761 }; 1762 1763 struct OptionalOperand { 1764 const char *Name; 1765 AMDGPUOperand::ImmTy Type; 1766 bool IsBit; 1767 bool (*ConvertResult)(int64_t&); 1768 }; 1769 1770 } // end anonymous namespace 1771 1772 // May be called with integer type with equivalent bitwidth. 1773 static const fltSemantics *getFltSemantics(unsigned Size) { 1774 switch (Size) { 1775 case 4: 1776 return &APFloat::IEEEsingle(); 1777 case 8: 1778 return &APFloat::IEEEdouble(); 1779 case 2: 1780 return &APFloat::IEEEhalf(); 1781 default: 1782 llvm_unreachable("unsupported fp type"); 1783 } 1784 } 1785 1786 static const fltSemantics *getFltSemantics(MVT VT) { 1787 return getFltSemantics(VT.getSizeInBits() / 8); 1788 } 1789 1790 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1791 switch (OperandType) { 1792 case AMDGPU::OPERAND_REG_IMM_INT32: 1793 case AMDGPU::OPERAND_REG_IMM_FP32: 1794 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1795 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1796 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1798 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1799 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1800 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1801 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1802 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1803 case AMDGPU::OPERAND_KIMM32: 1804 return &APFloat::IEEEsingle(); 1805 case AMDGPU::OPERAND_REG_IMM_INT64: 1806 case AMDGPU::OPERAND_REG_IMM_FP64: 1807 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1808 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1809 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1810 return &APFloat::IEEEdouble(); 1811 case AMDGPU::OPERAND_REG_IMM_INT16: 1812 case AMDGPU::OPERAND_REG_IMM_FP16: 1813 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1814 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1815 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1816 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1817 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1818 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1820 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1822 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1823 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1824 case AMDGPU::OPERAND_KIMM16: 1825 return &APFloat::IEEEhalf(); 1826 default: 1827 llvm_unreachable("unsupported fp type"); 1828 } 1829 } 1830 1831 //===----------------------------------------------------------------------===// 1832 // Operand 1833 //===----------------------------------------------------------------------===// 1834 1835 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1836 bool Lost; 1837 1838 // Convert literal to single precision 1839 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1840 APFloat::rmNearestTiesToEven, 1841 &Lost); 1842 // We allow precision lost but not overflow or underflow 1843 if (Status != APFloat::opOK && 1844 Lost && 1845 ((Status & APFloat::opOverflow) != 0 || 1846 (Status & APFloat::opUnderflow) != 0)) { 1847 return false; 1848 } 1849 1850 return true; 1851 } 1852 1853 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1854 return isUIntN(Size, Val) || isIntN(Size, Val); 1855 } 1856 1857 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1858 if (VT.getScalarType() == MVT::i16) { 1859 // FP immediate values are broken. 1860 return isInlinableIntLiteral(Val); 1861 } 1862 1863 // f16/v2f16 operands work correctly for all values. 1864 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1865 } 1866 1867 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1868 1869 // This is a hack to enable named inline values like 1870 // shared_base with both 32-bit and 64-bit operands. 1871 // Note that these values are defined as 1872 // 32-bit operands only. 1873 if (isInlineValue()) { 1874 return true; 1875 } 1876 1877 if (!isImmTy(ImmTyNone)) { 1878 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1879 return false; 1880 } 1881 // TODO: We should avoid using host float here. It would be better to 1882 // check the float bit values which is what a few other places do. 1883 // We've had bot failures before due to weird NaN support on mips hosts. 1884 1885 APInt Literal(64, Imm.Val); 1886 1887 if (Imm.IsFPImm) { // We got fp literal token 1888 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1889 return AMDGPU::isInlinableLiteral64(Imm.Val, 1890 AsmParser->hasInv2PiInlineImm()); 1891 } 1892 1893 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1894 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1895 return false; 1896 1897 if (type.getScalarSizeInBits() == 16) { 1898 return isInlineableLiteralOp16( 1899 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1900 type, AsmParser->hasInv2PiInlineImm()); 1901 } 1902 1903 // Check if single precision literal is inlinable 1904 return AMDGPU::isInlinableLiteral32( 1905 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1906 AsmParser->hasInv2PiInlineImm()); 1907 } 1908 1909 // We got int literal token. 1910 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1911 return AMDGPU::isInlinableLiteral64(Imm.Val, 1912 AsmParser->hasInv2PiInlineImm()); 1913 } 1914 1915 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1916 return false; 1917 } 1918 1919 if (type.getScalarSizeInBits() == 16) { 1920 return isInlineableLiteralOp16( 1921 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1922 type, AsmParser->hasInv2PiInlineImm()); 1923 } 1924 1925 return AMDGPU::isInlinableLiteral32( 1926 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1927 AsmParser->hasInv2PiInlineImm()); 1928 } 1929 1930 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1931 // Check that this immediate can be added as literal 1932 if (!isImmTy(ImmTyNone)) { 1933 return false; 1934 } 1935 1936 if (!Imm.IsFPImm) { 1937 // We got int literal token. 1938 1939 if (type == MVT::f64 && hasFPModifiers()) { 1940 // Cannot apply fp modifiers to int literals preserving the same semantics 1941 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1942 // disable these cases. 1943 return false; 1944 } 1945 1946 unsigned Size = type.getSizeInBits(); 1947 if (Size == 64) 1948 Size = 32; 1949 1950 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1951 // types. 1952 return isSafeTruncation(Imm.Val, Size); 1953 } 1954 1955 // We got fp literal token 1956 if (type == MVT::f64) { // Expected 64-bit fp operand 1957 // We would set low 64-bits of literal to zeroes but we accept this literals 1958 return true; 1959 } 1960 1961 if (type == MVT::i64) { // Expected 64-bit int operand 1962 // We don't allow fp literals in 64-bit integer instructions. It is 1963 // unclear how we should encode them. 1964 return false; 1965 } 1966 1967 // We allow fp literals with f16x2 operands assuming that the specified 1968 // literal goes into the lower half and the upper half is zero. We also 1969 // require that the literal may be losslessly converted to f16. 1970 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1971 (type == MVT::v2i16)? MVT::i16 : 1972 (type == MVT::v2f32)? MVT::f32 : type; 1973 1974 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1975 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1976 } 1977 1978 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1979 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1980 } 1981 1982 bool AMDGPUOperand::isVRegWithInputMods() const { 1983 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1984 // GFX90A allows DPP on 64-bit operands. 1985 (isRegClass(AMDGPU::VReg_64RegClassID) && 1986 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1987 } 1988 1989 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1990 if (AsmParser->isVI()) 1991 return isVReg32(); 1992 else if (AsmParser->isGFX9Plus()) 1993 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1994 else 1995 return false; 1996 } 1997 1998 bool AMDGPUOperand::isSDWAFP16Operand() const { 1999 return isSDWAOperand(MVT::f16); 2000 } 2001 2002 bool AMDGPUOperand::isSDWAFP32Operand() const { 2003 return isSDWAOperand(MVT::f32); 2004 } 2005 2006 bool AMDGPUOperand::isSDWAInt16Operand() const { 2007 return isSDWAOperand(MVT::i16); 2008 } 2009 2010 bool AMDGPUOperand::isSDWAInt32Operand() const { 2011 return isSDWAOperand(MVT::i32); 2012 } 2013 2014 bool AMDGPUOperand::isBoolReg() const { 2015 auto FB = AsmParser->getFeatureBits(); 2016 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2017 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2018 } 2019 2020 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2021 { 2022 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2023 assert(Size == 2 || Size == 4 || Size == 8); 2024 2025 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2026 2027 if (Imm.Mods.Abs) { 2028 Val &= ~FpSignMask; 2029 } 2030 if (Imm.Mods.Neg) { 2031 Val ^= FpSignMask; 2032 } 2033 2034 return Val; 2035 } 2036 2037 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2038 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2039 Inst.getNumOperands())) { 2040 addLiteralImmOperand(Inst, Imm.Val, 2041 ApplyModifiers & 2042 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2043 } else { 2044 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2045 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2046 setImmKindNone(); 2047 } 2048 } 2049 2050 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2051 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2052 auto OpNum = Inst.getNumOperands(); 2053 // Check that this operand accepts literals 2054 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2055 2056 if (ApplyModifiers) { 2057 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2058 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2059 Val = applyInputFPModifiers(Val, Size); 2060 } 2061 2062 APInt Literal(64, Val); 2063 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2064 2065 if (Imm.IsFPImm) { // We got fp literal token 2066 switch (OpTy) { 2067 case AMDGPU::OPERAND_REG_IMM_INT64: 2068 case AMDGPU::OPERAND_REG_IMM_FP64: 2069 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2070 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2071 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2072 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2073 AsmParser->hasInv2PiInlineImm())) { 2074 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2075 setImmKindConst(); 2076 return; 2077 } 2078 2079 // Non-inlineable 2080 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2081 // For fp operands we check if low 32 bits are zeros 2082 if (Literal.getLoBits(32) != 0) { 2083 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2084 "Can't encode literal as exact 64-bit floating-point operand. " 2085 "Low 32-bits will be set to zero"); 2086 } 2087 2088 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2089 setImmKindLiteral(); 2090 return; 2091 } 2092 2093 // We don't allow fp literals in 64-bit integer instructions. It is 2094 // unclear how we should encode them. This case should be checked earlier 2095 // in predicate methods (isLiteralImm()) 2096 llvm_unreachable("fp literal in 64-bit integer instruction."); 2097 2098 case AMDGPU::OPERAND_REG_IMM_INT32: 2099 case AMDGPU::OPERAND_REG_IMM_FP32: 2100 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2101 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2102 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2103 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2104 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2105 case AMDGPU::OPERAND_REG_IMM_INT16: 2106 case AMDGPU::OPERAND_REG_IMM_FP16: 2107 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2108 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2109 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2110 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2111 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2112 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2113 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2114 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2115 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2116 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2117 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2118 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2119 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2121 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2122 case AMDGPU::OPERAND_KIMM32: 2123 case AMDGPU::OPERAND_KIMM16: { 2124 bool lost; 2125 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2126 // Convert literal to single precision 2127 FPLiteral.convert(*getOpFltSemantics(OpTy), 2128 APFloat::rmNearestTiesToEven, &lost); 2129 // We allow precision lost but not overflow or underflow. This should be 2130 // checked earlier in isLiteralImm() 2131 2132 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2133 Inst.addOperand(MCOperand::createImm(ImmVal)); 2134 setImmKindLiteral(); 2135 return; 2136 } 2137 default: 2138 llvm_unreachable("invalid operand size"); 2139 } 2140 2141 return; 2142 } 2143 2144 // We got int literal token. 2145 // Only sign extend inline immediates. 2146 switch (OpTy) { 2147 case AMDGPU::OPERAND_REG_IMM_INT32: 2148 case AMDGPU::OPERAND_REG_IMM_FP32: 2149 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2150 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2151 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2152 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2153 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2154 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2155 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2156 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2157 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2158 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2159 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2160 if (isSafeTruncation(Val, 32) && 2161 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2162 AsmParser->hasInv2PiInlineImm())) { 2163 Inst.addOperand(MCOperand::createImm(Val)); 2164 setImmKindConst(); 2165 return; 2166 } 2167 2168 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2169 setImmKindLiteral(); 2170 return; 2171 2172 case AMDGPU::OPERAND_REG_IMM_INT64: 2173 case AMDGPU::OPERAND_REG_IMM_FP64: 2174 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2175 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2176 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2177 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2178 Inst.addOperand(MCOperand::createImm(Val)); 2179 setImmKindConst(); 2180 return; 2181 } 2182 2183 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2184 setImmKindLiteral(); 2185 return; 2186 2187 case AMDGPU::OPERAND_REG_IMM_INT16: 2188 case AMDGPU::OPERAND_REG_IMM_FP16: 2189 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2190 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2191 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2192 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2193 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2194 if (isSafeTruncation(Val, 16) && 2195 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2196 AsmParser->hasInv2PiInlineImm())) { 2197 Inst.addOperand(MCOperand::createImm(Val)); 2198 setImmKindConst(); 2199 return; 2200 } 2201 2202 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2203 setImmKindLiteral(); 2204 return; 2205 2206 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2207 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2208 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2209 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2210 assert(isSafeTruncation(Val, 16)); 2211 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2212 AsmParser->hasInv2PiInlineImm())); 2213 2214 Inst.addOperand(MCOperand::createImm(Val)); 2215 return; 2216 } 2217 case AMDGPU::OPERAND_KIMM32: 2218 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2219 setImmKindNone(); 2220 return; 2221 case AMDGPU::OPERAND_KIMM16: 2222 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2223 setImmKindNone(); 2224 return; 2225 default: 2226 llvm_unreachable("invalid operand size"); 2227 } 2228 } 2229 2230 template <unsigned Bitwidth> 2231 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2232 APInt Literal(64, Imm.Val); 2233 setImmKindNone(); 2234 2235 if (!Imm.IsFPImm) { 2236 // We got int literal token. 2237 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2238 return; 2239 } 2240 2241 bool Lost; 2242 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2243 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2244 APFloat::rmNearestTiesToEven, &Lost); 2245 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2246 } 2247 2248 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2249 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2250 } 2251 2252 static bool isInlineValue(unsigned Reg) { 2253 switch (Reg) { 2254 case AMDGPU::SRC_SHARED_BASE: 2255 case AMDGPU::SRC_SHARED_LIMIT: 2256 case AMDGPU::SRC_PRIVATE_BASE: 2257 case AMDGPU::SRC_PRIVATE_LIMIT: 2258 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2259 return true; 2260 case AMDGPU::SRC_VCCZ: 2261 case AMDGPU::SRC_EXECZ: 2262 case AMDGPU::SRC_SCC: 2263 return true; 2264 case AMDGPU::SGPR_NULL: 2265 return true; 2266 default: 2267 return false; 2268 } 2269 } 2270 2271 bool AMDGPUOperand::isInlineValue() const { 2272 return isRegKind() && ::isInlineValue(getReg()); 2273 } 2274 2275 //===----------------------------------------------------------------------===// 2276 // AsmParser 2277 //===----------------------------------------------------------------------===// 2278 2279 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2280 if (Is == IS_VGPR) { 2281 switch (RegWidth) { 2282 default: return -1; 2283 case 32: 2284 return AMDGPU::VGPR_32RegClassID; 2285 case 64: 2286 return AMDGPU::VReg_64RegClassID; 2287 case 96: 2288 return AMDGPU::VReg_96RegClassID; 2289 case 128: 2290 return AMDGPU::VReg_128RegClassID; 2291 case 160: 2292 return AMDGPU::VReg_160RegClassID; 2293 case 192: 2294 return AMDGPU::VReg_192RegClassID; 2295 case 224: 2296 return AMDGPU::VReg_224RegClassID; 2297 case 256: 2298 return AMDGPU::VReg_256RegClassID; 2299 case 512: 2300 return AMDGPU::VReg_512RegClassID; 2301 case 1024: 2302 return AMDGPU::VReg_1024RegClassID; 2303 } 2304 } else if (Is == IS_TTMP) { 2305 switch (RegWidth) { 2306 default: return -1; 2307 case 32: 2308 return AMDGPU::TTMP_32RegClassID; 2309 case 64: 2310 return AMDGPU::TTMP_64RegClassID; 2311 case 128: 2312 return AMDGPU::TTMP_128RegClassID; 2313 case 256: 2314 return AMDGPU::TTMP_256RegClassID; 2315 case 512: 2316 return AMDGPU::TTMP_512RegClassID; 2317 } 2318 } else if (Is == IS_SGPR) { 2319 switch (RegWidth) { 2320 default: return -1; 2321 case 32: 2322 return AMDGPU::SGPR_32RegClassID; 2323 case 64: 2324 return AMDGPU::SGPR_64RegClassID; 2325 case 96: 2326 return AMDGPU::SGPR_96RegClassID; 2327 case 128: 2328 return AMDGPU::SGPR_128RegClassID; 2329 case 160: 2330 return AMDGPU::SGPR_160RegClassID; 2331 case 192: 2332 return AMDGPU::SGPR_192RegClassID; 2333 case 224: 2334 return AMDGPU::SGPR_224RegClassID; 2335 case 256: 2336 return AMDGPU::SGPR_256RegClassID; 2337 case 512: 2338 return AMDGPU::SGPR_512RegClassID; 2339 } 2340 } else if (Is == IS_AGPR) { 2341 switch (RegWidth) { 2342 default: return -1; 2343 case 32: 2344 return AMDGPU::AGPR_32RegClassID; 2345 case 64: 2346 return AMDGPU::AReg_64RegClassID; 2347 case 96: 2348 return AMDGPU::AReg_96RegClassID; 2349 case 128: 2350 return AMDGPU::AReg_128RegClassID; 2351 case 160: 2352 return AMDGPU::AReg_160RegClassID; 2353 case 192: 2354 return AMDGPU::AReg_192RegClassID; 2355 case 224: 2356 return AMDGPU::AReg_224RegClassID; 2357 case 256: 2358 return AMDGPU::AReg_256RegClassID; 2359 case 512: 2360 return AMDGPU::AReg_512RegClassID; 2361 case 1024: 2362 return AMDGPU::AReg_1024RegClassID; 2363 } 2364 } 2365 return -1; 2366 } 2367 2368 static unsigned getSpecialRegForName(StringRef RegName) { 2369 return StringSwitch<unsigned>(RegName) 2370 .Case("exec", AMDGPU::EXEC) 2371 .Case("vcc", AMDGPU::VCC) 2372 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2373 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2374 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2375 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2376 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2377 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2378 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2379 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2380 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2381 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2382 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2383 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2384 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2385 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2386 .Case("m0", AMDGPU::M0) 2387 .Case("vccz", AMDGPU::SRC_VCCZ) 2388 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2389 .Case("execz", AMDGPU::SRC_EXECZ) 2390 .Case("src_execz", AMDGPU::SRC_EXECZ) 2391 .Case("scc", AMDGPU::SRC_SCC) 2392 .Case("src_scc", AMDGPU::SRC_SCC) 2393 .Case("tba", AMDGPU::TBA) 2394 .Case("tma", AMDGPU::TMA) 2395 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2396 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2397 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2398 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2399 .Case("vcc_lo", AMDGPU::VCC_LO) 2400 .Case("vcc_hi", AMDGPU::VCC_HI) 2401 .Case("exec_lo", AMDGPU::EXEC_LO) 2402 .Case("exec_hi", AMDGPU::EXEC_HI) 2403 .Case("tma_lo", AMDGPU::TMA_LO) 2404 .Case("tma_hi", AMDGPU::TMA_HI) 2405 .Case("tba_lo", AMDGPU::TBA_LO) 2406 .Case("tba_hi", AMDGPU::TBA_HI) 2407 .Case("pc", AMDGPU::PC_REG) 2408 .Case("null", AMDGPU::SGPR_NULL) 2409 .Default(AMDGPU::NoRegister); 2410 } 2411 2412 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2413 SMLoc &EndLoc, bool RestoreOnFailure) { 2414 auto R = parseRegister(); 2415 if (!R) return true; 2416 assert(R->isReg()); 2417 RegNo = R->getReg(); 2418 StartLoc = R->getStartLoc(); 2419 EndLoc = R->getEndLoc(); 2420 return false; 2421 } 2422 2423 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2424 SMLoc &EndLoc) { 2425 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2426 } 2427 2428 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2429 SMLoc &StartLoc, 2430 SMLoc &EndLoc) { 2431 bool Result = 2432 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2433 bool PendingErrors = getParser().hasPendingError(); 2434 getParser().clearPendingErrors(); 2435 if (PendingErrors) 2436 return MatchOperand_ParseFail; 2437 if (Result) 2438 return MatchOperand_NoMatch; 2439 return MatchOperand_Success; 2440 } 2441 2442 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2443 RegisterKind RegKind, unsigned Reg1, 2444 SMLoc Loc) { 2445 switch (RegKind) { 2446 case IS_SPECIAL: 2447 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2448 Reg = AMDGPU::EXEC; 2449 RegWidth = 64; 2450 return true; 2451 } 2452 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2453 Reg = AMDGPU::FLAT_SCR; 2454 RegWidth = 64; 2455 return true; 2456 } 2457 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2458 Reg = AMDGPU::XNACK_MASK; 2459 RegWidth = 64; 2460 return true; 2461 } 2462 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2463 Reg = AMDGPU::VCC; 2464 RegWidth = 64; 2465 return true; 2466 } 2467 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2468 Reg = AMDGPU::TBA; 2469 RegWidth = 64; 2470 return true; 2471 } 2472 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2473 Reg = AMDGPU::TMA; 2474 RegWidth = 64; 2475 return true; 2476 } 2477 Error(Loc, "register does not fit in the list"); 2478 return false; 2479 case IS_VGPR: 2480 case IS_SGPR: 2481 case IS_AGPR: 2482 case IS_TTMP: 2483 if (Reg1 != Reg + RegWidth / 32) { 2484 Error(Loc, "registers in a list must have consecutive indices"); 2485 return false; 2486 } 2487 RegWidth += 32; 2488 return true; 2489 default: 2490 llvm_unreachable("unexpected register kind"); 2491 } 2492 } 2493 2494 struct RegInfo { 2495 StringLiteral Name; 2496 RegisterKind Kind; 2497 }; 2498 2499 static constexpr RegInfo RegularRegisters[] = { 2500 {{"v"}, IS_VGPR}, 2501 {{"s"}, IS_SGPR}, 2502 {{"ttmp"}, IS_TTMP}, 2503 {{"acc"}, IS_AGPR}, 2504 {{"a"}, IS_AGPR}, 2505 }; 2506 2507 static bool isRegularReg(RegisterKind Kind) { 2508 return Kind == IS_VGPR || 2509 Kind == IS_SGPR || 2510 Kind == IS_TTMP || 2511 Kind == IS_AGPR; 2512 } 2513 2514 static const RegInfo* getRegularRegInfo(StringRef Str) { 2515 for (const RegInfo &Reg : RegularRegisters) 2516 if (Str.startswith(Reg.Name)) 2517 return &Reg; 2518 return nullptr; 2519 } 2520 2521 static bool getRegNum(StringRef Str, unsigned& Num) { 2522 return !Str.getAsInteger(10, Num); 2523 } 2524 2525 bool 2526 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2527 const AsmToken &NextToken) const { 2528 2529 // A list of consecutive registers: [s0,s1,s2,s3] 2530 if (Token.is(AsmToken::LBrac)) 2531 return true; 2532 2533 if (!Token.is(AsmToken::Identifier)) 2534 return false; 2535 2536 // A single register like s0 or a range of registers like s[0:1] 2537 2538 StringRef Str = Token.getString(); 2539 const RegInfo *Reg = getRegularRegInfo(Str); 2540 if (Reg) { 2541 StringRef RegName = Reg->Name; 2542 StringRef RegSuffix = Str.substr(RegName.size()); 2543 if (!RegSuffix.empty()) { 2544 unsigned Num; 2545 // A single register with an index: rXX 2546 if (getRegNum(RegSuffix, Num)) 2547 return true; 2548 } else { 2549 // A range of registers: r[XX:YY]. 2550 if (NextToken.is(AsmToken::LBrac)) 2551 return true; 2552 } 2553 } 2554 2555 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2556 } 2557 2558 bool 2559 AMDGPUAsmParser::isRegister() 2560 { 2561 return isRegister(getToken(), peekToken()); 2562 } 2563 2564 unsigned 2565 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2566 unsigned RegNum, 2567 unsigned RegWidth, 2568 SMLoc Loc) { 2569 2570 assert(isRegularReg(RegKind)); 2571 2572 unsigned AlignSize = 1; 2573 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2574 // SGPR and TTMP registers must be aligned. 2575 // Max required alignment is 4 dwords. 2576 AlignSize = std::min(RegWidth / 32, 4u); 2577 } 2578 2579 if (RegNum % AlignSize != 0) { 2580 Error(Loc, "invalid register alignment"); 2581 return AMDGPU::NoRegister; 2582 } 2583 2584 unsigned RegIdx = RegNum / AlignSize; 2585 int RCID = getRegClass(RegKind, RegWidth); 2586 if (RCID == -1) { 2587 Error(Loc, "invalid or unsupported register size"); 2588 return AMDGPU::NoRegister; 2589 } 2590 2591 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2592 const MCRegisterClass RC = TRI->getRegClass(RCID); 2593 if (RegIdx >= RC.getNumRegs()) { 2594 Error(Loc, "register index is out of range"); 2595 return AMDGPU::NoRegister; 2596 } 2597 2598 return RC.getRegister(RegIdx); 2599 } 2600 2601 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2602 int64_t RegLo, RegHi; 2603 if (!skipToken(AsmToken::LBrac, "missing register index")) 2604 return false; 2605 2606 SMLoc FirstIdxLoc = getLoc(); 2607 SMLoc SecondIdxLoc; 2608 2609 if (!parseExpr(RegLo)) 2610 return false; 2611 2612 if (trySkipToken(AsmToken::Colon)) { 2613 SecondIdxLoc = getLoc(); 2614 if (!parseExpr(RegHi)) 2615 return false; 2616 } else { 2617 RegHi = RegLo; 2618 } 2619 2620 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2621 return false; 2622 2623 if (!isUInt<32>(RegLo)) { 2624 Error(FirstIdxLoc, "invalid register index"); 2625 return false; 2626 } 2627 2628 if (!isUInt<32>(RegHi)) { 2629 Error(SecondIdxLoc, "invalid register index"); 2630 return false; 2631 } 2632 2633 if (RegLo > RegHi) { 2634 Error(FirstIdxLoc, "first register index should not exceed second index"); 2635 return false; 2636 } 2637 2638 Num = static_cast<unsigned>(RegLo); 2639 RegWidth = 32 * ((RegHi - RegLo) + 1); 2640 return true; 2641 } 2642 2643 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2644 unsigned &RegNum, unsigned &RegWidth, 2645 SmallVectorImpl<AsmToken> &Tokens) { 2646 assert(isToken(AsmToken::Identifier)); 2647 unsigned Reg = getSpecialRegForName(getTokenStr()); 2648 if (Reg) { 2649 RegNum = 0; 2650 RegWidth = 32; 2651 RegKind = IS_SPECIAL; 2652 Tokens.push_back(getToken()); 2653 lex(); // skip register name 2654 } 2655 return Reg; 2656 } 2657 2658 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2659 unsigned &RegNum, unsigned &RegWidth, 2660 SmallVectorImpl<AsmToken> &Tokens) { 2661 assert(isToken(AsmToken::Identifier)); 2662 StringRef RegName = getTokenStr(); 2663 auto Loc = getLoc(); 2664 2665 const RegInfo *RI = getRegularRegInfo(RegName); 2666 if (!RI) { 2667 Error(Loc, "invalid register name"); 2668 return AMDGPU::NoRegister; 2669 } 2670 2671 Tokens.push_back(getToken()); 2672 lex(); // skip register name 2673 2674 RegKind = RI->Kind; 2675 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2676 if (!RegSuffix.empty()) { 2677 // Single 32-bit register: vXX. 2678 if (!getRegNum(RegSuffix, RegNum)) { 2679 Error(Loc, "invalid register index"); 2680 return AMDGPU::NoRegister; 2681 } 2682 RegWidth = 32; 2683 } else { 2684 // Range of registers: v[XX:YY]. ":YY" is optional. 2685 if (!ParseRegRange(RegNum, RegWidth)) 2686 return AMDGPU::NoRegister; 2687 } 2688 2689 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2690 } 2691 2692 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2693 unsigned &RegWidth, 2694 SmallVectorImpl<AsmToken> &Tokens) { 2695 unsigned Reg = AMDGPU::NoRegister; 2696 auto ListLoc = getLoc(); 2697 2698 if (!skipToken(AsmToken::LBrac, 2699 "expected a register or a list of registers")) { 2700 return AMDGPU::NoRegister; 2701 } 2702 2703 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2704 2705 auto Loc = getLoc(); 2706 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2707 return AMDGPU::NoRegister; 2708 if (RegWidth != 32) { 2709 Error(Loc, "expected a single 32-bit register"); 2710 return AMDGPU::NoRegister; 2711 } 2712 2713 for (; trySkipToken(AsmToken::Comma); ) { 2714 RegisterKind NextRegKind; 2715 unsigned NextReg, NextRegNum, NextRegWidth; 2716 Loc = getLoc(); 2717 2718 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2719 NextRegNum, NextRegWidth, 2720 Tokens)) { 2721 return AMDGPU::NoRegister; 2722 } 2723 if (NextRegWidth != 32) { 2724 Error(Loc, "expected a single 32-bit register"); 2725 return AMDGPU::NoRegister; 2726 } 2727 if (NextRegKind != RegKind) { 2728 Error(Loc, "registers in a list must be of the same kind"); 2729 return AMDGPU::NoRegister; 2730 } 2731 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2732 return AMDGPU::NoRegister; 2733 } 2734 2735 if (!skipToken(AsmToken::RBrac, 2736 "expected a comma or a closing square bracket")) { 2737 return AMDGPU::NoRegister; 2738 } 2739 2740 if (isRegularReg(RegKind)) 2741 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2742 2743 return Reg; 2744 } 2745 2746 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2747 unsigned &RegNum, unsigned &RegWidth, 2748 SmallVectorImpl<AsmToken> &Tokens) { 2749 auto Loc = getLoc(); 2750 Reg = AMDGPU::NoRegister; 2751 2752 if (isToken(AsmToken::Identifier)) { 2753 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2754 if (Reg == AMDGPU::NoRegister) 2755 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2756 } else { 2757 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2758 } 2759 2760 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2761 if (Reg == AMDGPU::NoRegister) { 2762 assert(Parser.hasPendingError()); 2763 return false; 2764 } 2765 2766 if (!subtargetHasRegister(*TRI, Reg)) { 2767 if (Reg == AMDGPU::SGPR_NULL) { 2768 Error(Loc, "'null' operand is not supported on this GPU"); 2769 } else { 2770 Error(Loc, "register not available on this GPU"); 2771 } 2772 return false; 2773 } 2774 2775 return true; 2776 } 2777 2778 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2779 unsigned &RegNum, unsigned &RegWidth, 2780 bool RestoreOnFailure /*=false*/) { 2781 Reg = AMDGPU::NoRegister; 2782 2783 SmallVector<AsmToken, 1> Tokens; 2784 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2785 if (RestoreOnFailure) { 2786 while (!Tokens.empty()) { 2787 getLexer().UnLex(Tokens.pop_back_val()); 2788 } 2789 } 2790 return true; 2791 } 2792 return false; 2793 } 2794 2795 Optional<StringRef> 2796 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2797 switch (RegKind) { 2798 case IS_VGPR: 2799 return StringRef(".amdgcn.next_free_vgpr"); 2800 case IS_SGPR: 2801 return StringRef(".amdgcn.next_free_sgpr"); 2802 default: 2803 return None; 2804 } 2805 } 2806 2807 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2808 auto SymbolName = getGprCountSymbolName(RegKind); 2809 assert(SymbolName && "initializing invalid register kind"); 2810 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2811 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2812 } 2813 2814 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2815 unsigned DwordRegIndex, 2816 unsigned RegWidth) { 2817 // Symbols are only defined for GCN targets 2818 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2819 return true; 2820 2821 auto SymbolName = getGprCountSymbolName(RegKind); 2822 if (!SymbolName) 2823 return true; 2824 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2825 2826 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2827 int64_t OldCount; 2828 2829 if (!Sym->isVariable()) 2830 return !Error(getLoc(), 2831 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2832 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2833 return !Error( 2834 getLoc(), 2835 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2836 2837 if (OldCount <= NewMax) 2838 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2839 2840 return true; 2841 } 2842 2843 std::unique_ptr<AMDGPUOperand> 2844 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2845 const auto &Tok = getToken(); 2846 SMLoc StartLoc = Tok.getLoc(); 2847 SMLoc EndLoc = Tok.getEndLoc(); 2848 RegisterKind RegKind; 2849 unsigned Reg, RegNum, RegWidth; 2850 2851 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2852 return nullptr; 2853 } 2854 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2855 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2856 return nullptr; 2857 } else 2858 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2859 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2860 } 2861 2862 OperandMatchResultTy 2863 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2864 // TODO: add syntactic sugar for 1/(2*PI) 2865 2866 assert(!isRegister()); 2867 assert(!isModifier()); 2868 2869 const auto& Tok = getToken(); 2870 const auto& NextTok = peekToken(); 2871 bool IsReal = Tok.is(AsmToken::Real); 2872 SMLoc S = getLoc(); 2873 bool Negate = false; 2874 2875 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2876 lex(); 2877 IsReal = true; 2878 Negate = true; 2879 } 2880 2881 if (IsReal) { 2882 // Floating-point expressions are not supported. 2883 // Can only allow floating-point literals with an 2884 // optional sign. 2885 2886 StringRef Num = getTokenStr(); 2887 lex(); 2888 2889 APFloat RealVal(APFloat::IEEEdouble()); 2890 auto roundMode = APFloat::rmNearestTiesToEven; 2891 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2892 return MatchOperand_ParseFail; 2893 } 2894 if (Negate) 2895 RealVal.changeSign(); 2896 2897 Operands.push_back( 2898 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2899 AMDGPUOperand::ImmTyNone, true)); 2900 2901 return MatchOperand_Success; 2902 2903 } else { 2904 int64_t IntVal; 2905 const MCExpr *Expr; 2906 SMLoc S = getLoc(); 2907 2908 if (HasSP3AbsModifier) { 2909 // This is a workaround for handling expressions 2910 // as arguments of SP3 'abs' modifier, for example: 2911 // |1.0| 2912 // |-1| 2913 // |1+x| 2914 // This syntax is not compatible with syntax of standard 2915 // MC expressions (due to the trailing '|'). 2916 SMLoc EndLoc; 2917 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2918 return MatchOperand_ParseFail; 2919 } else { 2920 if (Parser.parseExpression(Expr)) 2921 return MatchOperand_ParseFail; 2922 } 2923 2924 if (Expr->evaluateAsAbsolute(IntVal)) { 2925 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2926 } else { 2927 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2928 } 2929 2930 return MatchOperand_Success; 2931 } 2932 2933 return MatchOperand_NoMatch; 2934 } 2935 2936 OperandMatchResultTy 2937 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2938 if (!isRegister()) 2939 return MatchOperand_NoMatch; 2940 2941 if (auto R = parseRegister()) { 2942 assert(R->isReg()); 2943 Operands.push_back(std::move(R)); 2944 return MatchOperand_Success; 2945 } 2946 return MatchOperand_ParseFail; 2947 } 2948 2949 OperandMatchResultTy 2950 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2951 auto res = parseReg(Operands); 2952 if (res != MatchOperand_NoMatch) { 2953 return res; 2954 } else if (isModifier()) { 2955 return MatchOperand_NoMatch; 2956 } else { 2957 return parseImm(Operands, HasSP3AbsMod); 2958 } 2959 } 2960 2961 bool 2962 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2963 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2964 const auto &str = Token.getString(); 2965 return str == "abs" || str == "neg" || str == "sext"; 2966 } 2967 return false; 2968 } 2969 2970 bool 2971 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2972 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2973 } 2974 2975 bool 2976 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2977 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2978 } 2979 2980 bool 2981 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2982 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2983 } 2984 2985 // Check if this is an operand modifier or an opcode modifier 2986 // which may look like an expression but it is not. We should 2987 // avoid parsing these modifiers as expressions. Currently 2988 // recognized sequences are: 2989 // |...| 2990 // abs(...) 2991 // neg(...) 2992 // sext(...) 2993 // -reg 2994 // -|...| 2995 // -abs(...) 2996 // name:... 2997 // Note that simple opcode modifiers like 'gds' may be parsed as 2998 // expressions; this is a special case. See getExpressionAsToken. 2999 // 3000 bool 3001 AMDGPUAsmParser::isModifier() { 3002 3003 AsmToken Tok = getToken(); 3004 AsmToken NextToken[2]; 3005 peekTokens(NextToken); 3006 3007 return isOperandModifier(Tok, NextToken[0]) || 3008 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3009 isOpcodeModifierWithVal(Tok, NextToken[0]); 3010 } 3011 3012 // Check if the current token is an SP3 'neg' modifier. 3013 // Currently this modifier is allowed in the following context: 3014 // 3015 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3016 // 2. Before an 'abs' modifier: -abs(...) 3017 // 3. Before an SP3 'abs' modifier: -|...| 3018 // 3019 // In all other cases "-" is handled as a part 3020 // of an expression that follows the sign. 3021 // 3022 // Note: When "-" is followed by an integer literal, 3023 // this is interpreted as integer negation rather 3024 // than a floating-point NEG modifier applied to N. 3025 // Beside being contr-intuitive, such use of floating-point 3026 // NEG modifier would have resulted in different meaning 3027 // of integer literals used with VOP1/2/C and VOP3, 3028 // for example: 3029 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3030 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3031 // Negative fp literals with preceding "-" are 3032 // handled likewise for uniformity 3033 // 3034 bool 3035 AMDGPUAsmParser::parseSP3NegModifier() { 3036 3037 AsmToken NextToken[2]; 3038 peekTokens(NextToken); 3039 3040 if (isToken(AsmToken::Minus) && 3041 (isRegister(NextToken[0], NextToken[1]) || 3042 NextToken[0].is(AsmToken::Pipe) || 3043 isId(NextToken[0], "abs"))) { 3044 lex(); 3045 return true; 3046 } 3047 3048 return false; 3049 } 3050 3051 OperandMatchResultTy 3052 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3053 bool AllowImm) { 3054 bool Neg, SP3Neg; 3055 bool Abs, SP3Abs; 3056 SMLoc Loc; 3057 3058 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3059 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3060 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3061 return MatchOperand_ParseFail; 3062 } 3063 3064 SP3Neg = parseSP3NegModifier(); 3065 3066 Loc = getLoc(); 3067 Neg = trySkipId("neg"); 3068 if (Neg && SP3Neg) { 3069 Error(Loc, "expected register or immediate"); 3070 return MatchOperand_ParseFail; 3071 } 3072 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3073 return MatchOperand_ParseFail; 3074 3075 Abs = trySkipId("abs"); 3076 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3077 return MatchOperand_ParseFail; 3078 3079 Loc = getLoc(); 3080 SP3Abs = trySkipToken(AsmToken::Pipe); 3081 if (Abs && SP3Abs) { 3082 Error(Loc, "expected register or immediate"); 3083 return MatchOperand_ParseFail; 3084 } 3085 3086 OperandMatchResultTy Res; 3087 if (AllowImm) { 3088 Res = parseRegOrImm(Operands, SP3Abs); 3089 } else { 3090 Res = parseReg(Operands); 3091 } 3092 if (Res != MatchOperand_Success) { 3093 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3094 } 3095 3096 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3097 return MatchOperand_ParseFail; 3098 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3099 return MatchOperand_ParseFail; 3100 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3101 return MatchOperand_ParseFail; 3102 3103 AMDGPUOperand::Modifiers Mods; 3104 Mods.Abs = Abs || SP3Abs; 3105 Mods.Neg = Neg || SP3Neg; 3106 3107 if (Mods.hasFPModifiers()) { 3108 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3109 if (Op.isExpr()) { 3110 Error(Op.getStartLoc(), "expected an absolute expression"); 3111 return MatchOperand_ParseFail; 3112 } 3113 Op.setModifiers(Mods); 3114 } 3115 return MatchOperand_Success; 3116 } 3117 3118 OperandMatchResultTy 3119 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3120 bool AllowImm) { 3121 bool Sext = trySkipId("sext"); 3122 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3123 return MatchOperand_ParseFail; 3124 3125 OperandMatchResultTy Res; 3126 if (AllowImm) { 3127 Res = parseRegOrImm(Operands); 3128 } else { 3129 Res = parseReg(Operands); 3130 } 3131 if (Res != MatchOperand_Success) { 3132 return Sext? MatchOperand_ParseFail : Res; 3133 } 3134 3135 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3136 return MatchOperand_ParseFail; 3137 3138 AMDGPUOperand::Modifiers Mods; 3139 Mods.Sext = Sext; 3140 3141 if (Mods.hasIntModifiers()) { 3142 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3143 if (Op.isExpr()) { 3144 Error(Op.getStartLoc(), "expected an absolute expression"); 3145 return MatchOperand_ParseFail; 3146 } 3147 Op.setModifiers(Mods); 3148 } 3149 3150 return MatchOperand_Success; 3151 } 3152 3153 OperandMatchResultTy 3154 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3155 return parseRegOrImmWithFPInputMods(Operands, false); 3156 } 3157 3158 OperandMatchResultTy 3159 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3160 return parseRegOrImmWithIntInputMods(Operands, false); 3161 } 3162 3163 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3164 auto Loc = getLoc(); 3165 if (trySkipId("off")) { 3166 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3167 AMDGPUOperand::ImmTyOff, false)); 3168 return MatchOperand_Success; 3169 } 3170 3171 if (!isRegister()) 3172 return MatchOperand_NoMatch; 3173 3174 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3175 if (Reg) { 3176 Operands.push_back(std::move(Reg)); 3177 return MatchOperand_Success; 3178 } 3179 3180 return MatchOperand_ParseFail; 3181 3182 } 3183 3184 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3185 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3186 3187 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3188 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3189 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3190 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3191 return Match_InvalidOperand; 3192 3193 if ((TSFlags & SIInstrFlags::VOP3) && 3194 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3195 getForcedEncodingSize() != 64) 3196 return Match_PreferE32; 3197 3198 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3199 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3200 // v_mac_f32/16 allow only dst_sel == DWORD; 3201 auto OpNum = 3202 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3203 const auto &Op = Inst.getOperand(OpNum); 3204 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3205 return Match_InvalidOperand; 3206 } 3207 } 3208 3209 return Match_Success; 3210 } 3211 3212 static ArrayRef<unsigned> getAllVariants() { 3213 static const unsigned Variants[] = { 3214 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3215 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3216 }; 3217 3218 return makeArrayRef(Variants); 3219 } 3220 3221 // What asm variants we should check 3222 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3223 if (getForcedEncodingSize() == 32) { 3224 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3225 return makeArrayRef(Variants); 3226 } 3227 3228 if (isForcedVOP3()) { 3229 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3230 return makeArrayRef(Variants); 3231 } 3232 3233 if (isForcedSDWA()) { 3234 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3235 AMDGPUAsmVariants::SDWA9}; 3236 return makeArrayRef(Variants); 3237 } 3238 3239 if (isForcedDPP()) { 3240 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3241 return makeArrayRef(Variants); 3242 } 3243 3244 return getAllVariants(); 3245 } 3246 3247 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3248 if (getForcedEncodingSize() == 32) 3249 return "e32"; 3250 3251 if (isForcedVOP3()) 3252 return "e64"; 3253 3254 if (isForcedSDWA()) 3255 return "sdwa"; 3256 3257 if (isForcedDPP()) 3258 return "dpp"; 3259 3260 return ""; 3261 } 3262 3263 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3264 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3265 const unsigned Num = Desc.getNumImplicitUses(); 3266 for (unsigned i = 0; i < Num; ++i) { 3267 unsigned Reg = Desc.ImplicitUses[i]; 3268 switch (Reg) { 3269 case AMDGPU::FLAT_SCR: 3270 case AMDGPU::VCC: 3271 case AMDGPU::VCC_LO: 3272 case AMDGPU::VCC_HI: 3273 case AMDGPU::M0: 3274 return Reg; 3275 default: 3276 break; 3277 } 3278 } 3279 return AMDGPU::NoRegister; 3280 } 3281 3282 // NB: This code is correct only when used to check constant 3283 // bus limitations because GFX7 support no f16 inline constants. 3284 // Note that there are no cases when a GFX7 opcode violates 3285 // constant bus limitations due to the use of an f16 constant. 3286 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3287 unsigned OpIdx) const { 3288 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3289 3290 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3291 return false; 3292 } 3293 3294 const MCOperand &MO = Inst.getOperand(OpIdx); 3295 3296 int64_t Val = MO.getImm(); 3297 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3298 3299 switch (OpSize) { // expected operand size 3300 case 8: 3301 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3302 case 4: 3303 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3304 case 2: { 3305 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3306 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3307 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3308 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3309 return AMDGPU::isInlinableIntLiteral(Val); 3310 3311 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3312 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3313 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3314 return AMDGPU::isInlinableIntLiteralV216(Val); 3315 3316 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3317 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3318 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3319 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3320 3321 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3322 } 3323 default: 3324 llvm_unreachable("invalid operand size"); 3325 } 3326 } 3327 3328 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3329 if (!isGFX10Plus()) 3330 return 1; 3331 3332 switch (Opcode) { 3333 // 64-bit shift instructions can use only one scalar value input 3334 case AMDGPU::V_LSHLREV_B64_e64: 3335 case AMDGPU::V_LSHLREV_B64_gfx10: 3336 case AMDGPU::V_LSHRREV_B64_e64: 3337 case AMDGPU::V_LSHRREV_B64_gfx10: 3338 case AMDGPU::V_ASHRREV_I64_e64: 3339 case AMDGPU::V_ASHRREV_I64_gfx10: 3340 case AMDGPU::V_LSHL_B64_e64: 3341 case AMDGPU::V_LSHR_B64_e64: 3342 case AMDGPU::V_ASHR_I64_e64: 3343 return 1; 3344 default: 3345 return 2; 3346 } 3347 } 3348 3349 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3350 const MCOperand &MO = Inst.getOperand(OpIdx); 3351 if (MO.isImm()) { 3352 return !isInlineConstant(Inst, OpIdx); 3353 } else if (MO.isReg()) { 3354 auto Reg = MO.getReg(); 3355 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3356 auto PReg = mc2PseudoReg(Reg); 3357 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3358 } else { 3359 return true; 3360 } 3361 } 3362 3363 bool 3364 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3365 const OperandVector &Operands) { 3366 const unsigned Opcode = Inst.getOpcode(); 3367 const MCInstrDesc &Desc = MII.get(Opcode); 3368 unsigned LastSGPR = AMDGPU::NoRegister; 3369 unsigned ConstantBusUseCount = 0; 3370 unsigned NumLiterals = 0; 3371 unsigned LiteralSize; 3372 3373 if (Desc.TSFlags & 3374 (SIInstrFlags::VOPC | 3375 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3376 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3377 SIInstrFlags::SDWA)) { 3378 // Check special imm operands (used by madmk, etc) 3379 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3380 ++NumLiterals; 3381 LiteralSize = 4; 3382 } 3383 3384 SmallDenseSet<unsigned> SGPRsUsed; 3385 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3386 if (SGPRUsed != AMDGPU::NoRegister) { 3387 SGPRsUsed.insert(SGPRUsed); 3388 ++ConstantBusUseCount; 3389 } 3390 3391 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3392 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3393 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3394 3395 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3396 3397 for (int OpIdx : OpIndices) { 3398 if (OpIdx == -1) break; 3399 3400 const MCOperand &MO = Inst.getOperand(OpIdx); 3401 if (usesConstantBus(Inst, OpIdx)) { 3402 if (MO.isReg()) { 3403 LastSGPR = mc2PseudoReg(MO.getReg()); 3404 // Pairs of registers with a partial intersections like these 3405 // s0, s[0:1] 3406 // flat_scratch_lo, flat_scratch 3407 // flat_scratch_lo, flat_scratch_hi 3408 // are theoretically valid but they are disabled anyway. 3409 // Note that this code mimics SIInstrInfo::verifyInstruction 3410 if (!SGPRsUsed.count(LastSGPR)) { 3411 SGPRsUsed.insert(LastSGPR); 3412 ++ConstantBusUseCount; 3413 } 3414 } else { // Expression or a literal 3415 3416 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3417 continue; // special operand like VINTERP attr_chan 3418 3419 // An instruction may use only one literal. 3420 // This has been validated on the previous step. 3421 // See validateVOPLiteral. 3422 // This literal may be used as more than one operand. 3423 // If all these operands are of the same size, 3424 // this literal counts as one scalar value. 3425 // Otherwise it counts as 2 scalar values. 3426 // See "GFX10 Shader Programming", section 3.6.2.3. 3427 3428 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3429 if (Size < 4) Size = 4; 3430 3431 if (NumLiterals == 0) { 3432 NumLiterals = 1; 3433 LiteralSize = Size; 3434 } else if (LiteralSize != Size) { 3435 NumLiterals = 2; 3436 } 3437 } 3438 } 3439 } 3440 } 3441 ConstantBusUseCount += NumLiterals; 3442 3443 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3444 return true; 3445 3446 SMLoc LitLoc = getLitLoc(Operands); 3447 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3448 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3449 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3450 return false; 3451 } 3452 3453 bool 3454 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3455 const OperandVector &Operands) { 3456 const unsigned Opcode = Inst.getOpcode(); 3457 const MCInstrDesc &Desc = MII.get(Opcode); 3458 3459 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3460 if (DstIdx == -1 || 3461 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3462 return true; 3463 } 3464 3465 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3466 3467 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3468 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3469 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3470 3471 assert(DstIdx != -1); 3472 const MCOperand &Dst = Inst.getOperand(DstIdx); 3473 assert(Dst.isReg()); 3474 3475 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3476 3477 for (int SrcIdx : SrcIndices) { 3478 if (SrcIdx == -1) break; 3479 const MCOperand &Src = Inst.getOperand(SrcIdx); 3480 if (Src.isReg()) { 3481 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3482 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3483 Error(getRegLoc(SrcReg, Operands), 3484 "destination must be different than all sources"); 3485 return false; 3486 } 3487 } 3488 } 3489 3490 return true; 3491 } 3492 3493 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3494 3495 const unsigned Opc = Inst.getOpcode(); 3496 const MCInstrDesc &Desc = MII.get(Opc); 3497 3498 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3499 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3500 assert(ClampIdx != -1); 3501 return Inst.getOperand(ClampIdx).getImm() == 0; 3502 } 3503 3504 return true; 3505 } 3506 3507 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3508 3509 const unsigned Opc = Inst.getOpcode(); 3510 const MCInstrDesc &Desc = MII.get(Opc); 3511 3512 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3513 return None; 3514 3515 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3516 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3517 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3518 3519 assert(VDataIdx != -1); 3520 3521 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3522 return None; 3523 3524 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3525 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3526 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3527 if (DMask == 0) 3528 DMask = 1; 3529 3530 bool isPackedD16 = false; 3531 unsigned DataSize = 3532 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3533 if (hasPackedD16()) { 3534 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3535 isPackedD16 = D16Idx >= 0; 3536 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3537 DataSize = (DataSize + 1) / 2; 3538 } 3539 3540 if ((VDataSize / 4) == DataSize + TFESize) 3541 return None; 3542 3543 return StringRef(isPackedD16 3544 ? "image data size does not match dmask, d16 and tfe" 3545 : "image data size does not match dmask and tfe"); 3546 } 3547 3548 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3549 const unsigned Opc = Inst.getOpcode(); 3550 const MCInstrDesc &Desc = MII.get(Opc); 3551 3552 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3553 return true; 3554 3555 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3556 3557 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3558 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3559 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3560 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3561 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3562 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3563 3564 assert(VAddr0Idx != -1); 3565 assert(SrsrcIdx != -1); 3566 assert(SrsrcIdx > VAddr0Idx); 3567 3568 if (DimIdx == -1) 3569 return true; // intersect_ray 3570 3571 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3572 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3573 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3574 unsigned ActualAddrSize = 3575 IsNSA ? SrsrcIdx - VAddr0Idx 3576 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3577 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3578 3579 unsigned ExpectedAddrSize = 3580 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3581 3582 if (!IsNSA) { 3583 if (ExpectedAddrSize > 8) 3584 ExpectedAddrSize = 16; 3585 3586 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3587 // This provides backward compatibility for assembly created 3588 // before 160b/192b/224b types were directly supported. 3589 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3590 return true; 3591 } 3592 3593 return ActualAddrSize == ExpectedAddrSize; 3594 } 3595 3596 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3597 3598 const unsigned Opc = Inst.getOpcode(); 3599 const MCInstrDesc &Desc = MII.get(Opc); 3600 3601 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3602 return true; 3603 if (!Desc.mayLoad() || !Desc.mayStore()) 3604 return true; // Not atomic 3605 3606 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3607 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3608 3609 // This is an incomplete check because image_atomic_cmpswap 3610 // may only use 0x3 and 0xf while other atomic operations 3611 // may use 0x1 and 0x3. However these limitations are 3612 // verified when we check that dmask matches dst size. 3613 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3614 } 3615 3616 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3617 3618 const unsigned Opc = Inst.getOpcode(); 3619 const MCInstrDesc &Desc = MII.get(Opc); 3620 3621 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3622 return true; 3623 3624 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3625 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3626 3627 // GATHER4 instructions use dmask in a different fashion compared to 3628 // other MIMG instructions. The only useful DMASK values are 3629 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3630 // (red,red,red,red) etc.) The ISA document doesn't mention 3631 // this. 3632 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3633 } 3634 3635 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3636 const unsigned Opc = Inst.getOpcode(); 3637 const MCInstrDesc &Desc = MII.get(Opc); 3638 3639 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3640 return true; 3641 3642 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3643 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3644 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3645 3646 if (!BaseOpcode->MSAA) 3647 return true; 3648 3649 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3650 assert(DimIdx != -1); 3651 3652 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3653 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3654 3655 return DimInfo->MSAA; 3656 } 3657 3658 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3659 { 3660 switch (Opcode) { 3661 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3662 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3663 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3664 return true; 3665 default: 3666 return false; 3667 } 3668 } 3669 3670 // movrels* opcodes should only allow VGPRS as src0. 3671 // This is specified in .td description for vop1/vop3, 3672 // but sdwa is handled differently. See isSDWAOperand. 3673 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3674 const OperandVector &Operands) { 3675 3676 const unsigned Opc = Inst.getOpcode(); 3677 const MCInstrDesc &Desc = MII.get(Opc); 3678 3679 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3680 return true; 3681 3682 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3683 assert(Src0Idx != -1); 3684 3685 SMLoc ErrLoc; 3686 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3687 if (Src0.isReg()) { 3688 auto Reg = mc2PseudoReg(Src0.getReg()); 3689 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3690 if (!isSGPR(Reg, TRI)) 3691 return true; 3692 ErrLoc = getRegLoc(Reg, Operands); 3693 } else { 3694 ErrLoc = getConstLoc(Operands); 3695 } 3696 3697 Error(ErrLoc, "source operand must be a VGPR"); 3698 return false; 3699 } 3700 3701 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3702 const OperandVector &Operands) { 3703 3704 const unsigned Opc = Inst.getOpcode(); 3705 3706 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3707 return true; 3708 3709 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3710 assert(Src0Idx != -1); 3711 3712 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3713 if (!Src0.isReg()) 3714 return true; 3715 3716 auto Reg = mc2PseudoReg(Src0.getReg()); 3717 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3718 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3719 Error(getRegLoc(Reg, Operands), 3720 "source operand must be either a VGPR or an inline constant"); 3721 return false; 3722 } 3723 3724 return true; 3725 } 3726 3727 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3728 const OperandVector &Operands) { 3729 const unsigned Opc = Inst.getOpcode(); 3730 const MCInstrDesc &Desc = MII.get(Opc); 3731 3732 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3733 return true; 3734 3735 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3736 if (Src2Idx == -1) 3737 return true; 3738 3739 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3740 if (!Src2.isReg()) 3741 return true; 3742 3743 MCRegister Src2Reg = Src2.getReg(); 3744 MCRegister DstReg = Inst.getOperand(0).getReg(); 3745 if (Src2Reg == DstReg) 3746 return true; 3747 3748 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3749 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3750 return true; 3751 3752 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3753 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3754 "source 2 operand must not partially overlap with dst"); 3755 return false; 3756 } 3757 3758 return true; 3759 } 3760 3761 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3762 switch (Inst.getOpcode()) { 3763 default: 3764 return true; 3765 case V_DIV_SCALE_F32_gfx6_gfx7: 3766 case V_DIV_SCALE_F32_vi: 3767 case V_DIV_SCALE_F32_gfx10: 3768 case V_DIV_SCALE_F64_gfx6_gfx7: 3769 case V_DIV_SCALE_F64_vi: 3770 case V_DIV_SCALE_F64_gfx10: 3771 break; 3772 } 3773 3774 // TODO: Check that src0 = src1 or src2. 3775 3776 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3777 AMDGPU::OpName::src2_modifiers, 3778 AMDGPU::OpName::src2_modifiers}) { 3779 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3780 .getImm() & 3781 SISrcMods::ABS) { 3782 return false; 3783 } 3784 } 3785 3786 return true; 3787 } 3788 3789 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3790 3791 const unsigned Opc = Inst.getOpcode(); 3792 const MCInstrDesc &Desc = MII.get(Opc); 3793 3794 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3795 return true; 3796 3797 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3798 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3799 if (isCI() || isSI()) 3800 return false; 3801 } 3802 3803 return true; 3804 } 3805 3806 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3807 const unsigned Opc = Inst.getOpcode(); 3808 const MCInstrDesc &Desc = MII.get(Opc); 3809 3810 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3811 return true; 3812 3813 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3814 if (DimIdx < 0) 3815 return true; 3816 3817 long Imm = Inst.getOperand(DimIdx).getImm(); 3818 if (Imm < 0 || Imm >= 8) 3819 return false; 3820 3821 return true; 3822 } 3823 3824 static bool IsRevOpcode(const unsigned Opcode) 3825 { 3826 switch (Opcode) { 3827 case AMDGPU::V_SUBREV_F32_e32: 3828 case AMDGPU::V_SUBREV_F32_e64: 3829 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3830 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3831 case AMDGPU::V_SUBREV_F32_e32_vi: 3832 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3833 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3834 case AMDGPU::V_SUBREV_F32_e64_vi: 3835 3836 case AMDGPU::V_SUBREV_CO_U32_e32: 3837 case AMDGPU::V_SUBREV_CO_U32_e64: 3838 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3839 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3840 3841 case AMDGPU::V_SUBBREV_U32_e32: 3842 case AMDGPU::V_SUBBREV_U32_e64: 3843 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3844 case AMDGPU::V_SUBBREV_U32_e32_vi: 3845 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3846 case AMDGPU::V_SUBBREV_U32_e64_vi: 3847 3848 case AMDGPU::V_SUBREV_U32_e32: 3849 case AMDGPU::V_SUBREV_U32_e64: 3850 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3851 case AMDGPU::V_SUBREV_U32_e32_vi: 3852 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3853 case AMDGPU::V_SUBREV_U32_e64_vi: 3854 3855 case AMDGPU::V_SUBREV_F16_e32: 3856 case AMDGPU::V_SUBREV_F16_e64: 3857 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3858 case AMDGPU::V_SUBREV_F16_e32_vi: 3859 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3860 case AMDGPU::V_SUBREV_F16_e64_vi: 3861 3862 case AMDGPU::V_SUBREV_U16_e32: 3863 case AMDGPU::V_SUBREV_U16_e64: 3864 case AMDGPU::V_SUBREV_U16_e32_vi: 3865 case AMDGPU::V_SUBREV_U16_e64_vi: 3866 3867 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3868 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3869 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3870 3871 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3872 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3873 3874 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3875 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3876 3877 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3878 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3879 3880 case AMDGPU::V_LSHRREV_B32_e32: 3881 case AMDGPU::V_LSHRREV_B32_e64: 3882 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3883 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3884 case AMDGPU::V_LSHRREV_B32_e32_vi: 3885 case AMDGPU::V_LSHRREV_B32_e64_vi: 3886 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3887 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3888 3889 case AMDGPU::V_ASHRREV_I32_e32: 3890 case AMDGPU::V_ASHRREV_I32_e64: 3891 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3892 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3893 case AMDGPU::V_ASHRREV_I32_e32_vi: 3894 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3895 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3896 case AMDGPU::V_ASHRREV_I32_e64_vi: 3897 3898 case AMDGPU::V_LSHLREV_B32_e32: 3899 case AMDGPU::V_LSHLREV_B32_e64: 3900 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3901 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3902 case AMDGPU::V_LSHLREV_B32_e32_vi: 3903 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3904 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3905 case AMDGPU::V_LSHLREV_B32_e64_vi: 3906 3907 case AMDGPU::V_LSHLREV_B16_e32: 3908 case AMDGPU::V_LSHLREV_B16_e64: 3909 case AMDGPU::V_LSHLREV_B16_e32_vi: 3910 case AMDGPU::V_LSHLREV_B16_e64_vi: 3911 case AMDGPU::V_LSHLREV_B16_gfx10: 3912 3913 case AMDGPU::V_LSHRREV_B16_e32: 3914 case AMDGPU::V_LSHRREV_B16_e64: 3915 case AMDGPU::V_LSHRREV_B16_e32_vi: 3916 case AMDGPU::V_LSHRREV_B16_e64_vi: 3917 case AMDGPU::V_LSHRREV_B16_gfx10: 3918 3919 case AMDGPU::V_ASHRREV_I16_e32: 3920 case AMDGPU::V_ASHRREV_I16_e64: 3921 case AMDGPU::V_ASHRREV_I16_e32_vi: 3922 case AMDGPU::V_ASHRREV_I16_e64_vi: 3923 case AMDGPU::V_ASHRREV_I16_gfx10: 3924 3925 case AMDGPU::V_LSHLREV_B64_e64: 3926 case AMDGPU::V_LSHLREV_B64_gfx10: 3927 case AMDGPU::V_LSHLREV_B64_vi: 3928 3929 case AMDGPU::V_LSHRREV_B64_e64: 3930 case AMDGPU::V_LSHRREV_B64_gfx10: 3931 case AMDGPU::V_LSHRREV_B64_vi: 3932 3933 case AMDGPU::V_ASHRREV_I64_e64: 3934 case AMDGPU::V_ASHRREV_I64_gfx10: 3935 case AMDGPU::V_ASHRREV_I64_vi: 3936 3937 case AMDGPU::V_PK_LSHLREV_B16: 3938 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3939 case AMDGPU::V_PK_LSHLREV_B16_vi: 3940 3941 case AMDGPU::V_PK_LSHRREV_B16: 3942 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3943 case AMDGPU::V_PK_LSHRREV_B16_vi: 3944 case AMDGPU::V_PK_ASHRREV_I16: 3945 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3946 case AMDGPU::V_PK_ASHRREV_I16_vi: 3947 return true; 3948 default: 3949 return false; 3950 } 3951 } 3952 3953 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3954 3955 using namespace SIInstrFlags; 3956 const unsigned Opcode = Inst.getOpcode(); 3957 const MCInstrDesc &Desc = MII.get(Opcode); 3958 3959 // lds_direct register is defined so that it can be used 3960 // with 9-bit operands only. Ignore encodings which do not accept these. 3961 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3962 if ((Desc.TSFlags & Enc) == 0) 3963 return None; 3964 3965 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3966 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3967 if (SrcIdx == -1) 3968 break; 3969 const auto &Src = Inst.getOperand(SrcIdx); 3970 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3971 3972 if (isGFX90A()) 3973 return StringRef("lds_direct is not supported on this GPU"); 3974 3975 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3976 return StringRef("lds_direct cannot be used with this instruction"); 3977 3978 if (SrcName != OpName::src0) 3979 return StringRef("lds_direct may be used as src0 only"); 3980 } 3981 } 3982 3983 return None; 3984 } 3985 3986 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3987 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3988 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3989 if (Op.isFlatOffset()) 3990 return Op.getStartLoc(); 3991 } 3992 return getLoc(); 3993 } 3994 3995 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3996 const OperandVector &Operands) { 3997 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3998 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3999 return true; 4000 4001 auto Opcode = Inst.getOpcode(); 4002 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4003 assert(OpNum != -1); 4004 4005 const auto &Op = Inst.getOperand(OpNum); 4006 if (!hasFlatOffsets() && Op.getImm() != 0) { 4007 Error(getFlatOffsetLoc(Operands), 4008 "flat offset modifier is not supported on this GPU"); 4009 return false; 4010 } 4011 4012 // For FLAT segment the offset must be positive; 4013 // MSB is ignored and forced to zero. 4014 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4015 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4016 if (!isIntN(OffsetSize, Op.getImm())) { 4017 Error(getFlatOffsetLoc(Operands), 4018 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4019 return false; 4020 } 4021 } else { 4022 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4023 if (!isUIntN(OffsetSize, Op.getImm())) { 4024 Error(getFlatOffsetLoc(Operands), 4025 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4026 return false; 4027 } 4028 } 4029 4030 return true; 4031 } 4032 4033 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4034 // Start with second operand because SMEM Offset cannot be dst or src0. 4035 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4036 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4037 if (Op.isSMEMOffset()) 4038 return Op.getStartLoc(); 4039 } 4040 return getLoc(); 4041 } 4042 4043 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4044 const OperandVector &Operands) { 4045 if (isCI() || isSI()) 4046 return true; 4047 4048 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4049 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4050 return true; 4051 4052 auto Opcode = Inst.getOpcode(); 4053 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4054 if (OpNum == -1) 4055 return true; 4056 4057 const auto &Op = Inst.getOperand(OpNum); 4058 if (!Op.isImm()) 4059 return true; 4060 4061 uint64_t Offset = Op.getImm(); 4062 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4063 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4064 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4065 return true; 4066 4067 Error(getSMEMOffsetLoc(Operands), 4068 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4069 "expected a 21-bit signed offset"); 4070 4071 return false; 4072 } 4073 4074 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4075 unsigned Opcode = Inst.getOpcode(); 4076 const MCInstrDesc &Desc = MII.get(Opcode); 4077 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4078 return true; 4079 4080 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4081 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4082 4083 const int OpIndices[] = { Src0Idx, Src1Idx }; 4084 4085 unsigned NumExprs = 0; 4086 unsigned NumLiterals = 0; 4087 uint32_t LiteralValue; 4088 4089 for (int OpIdx : OpIndices) { 4090 if (OpIdx == -1) break; 4091 4092 const MCOperand &MO = Inst.getOperand(OpIdx); 4093 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4094 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4095 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4096 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4097 if (NumLiterals == 0 || LiteralValue != Value) { 4098 LiteralValue = Value; 4099 ++NumLiterals; 4100 } 4101 } else if (MO.isExpr()) { 4102 ++NumExprs; 4103 } 4104 } 4105 } 4106 4107 return NumLiterals + NumExprs <= 1; 4108 } 4109 4110 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4111 const unsigned Opc = Inst.getOpcode(); 4112 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4113 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4114 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4115 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4116 4117 if (OpSel & ~3) 4118 return false; 4119 } 4120 4121 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4122 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4123 if (OpSelIdx != -1) { 4124 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4125 return false; 4126 } 4127 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4128 if (OpSelHiIdx != -1) { 4129 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4130 return false; 4131 } 4132 } 4133 4134 return true; 4135 } 4136 4137 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4138 const OperandVector &Operands) { 4139 const unsigned Opc = Inst.getOpcode(); 4140 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4141 if (DppCtrlIdx < 0) 4142 return true; 4143 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4144 4145 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4146 // DPP64 is supported for row_newbcast only. 4147 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4148 if (Src0Idx >= 0 && 4149 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4150 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4151 Error(S, "64 bit dpp only supports row_newbcast"); 4152 return false; 4153 } 4154 } 4155 4156 return true; 4157 } 4158 4159 // Check if VCC register matches wavefront size 4160 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4161 auto FB = getFeatureBits(); 4162 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4163 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4164 } 4165 4166 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4167 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4168 const OperandVector &Operands) { 4169 unsigned Opcode = Inst.getOpcode(); 4170 const MCInstrDesc &Desc = MII.get(Opcode); 4171 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4172 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4173 ImmIdx == -1) 4174 return true; 4175 4176 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4177 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4178 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4179 4180 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4181 4182 unsigned NumExprs = 0; 4183 unsigned NumLiterals = 0; 4184 uint32_t LiteralValue; 4185 4186 for (int OpIdx : OpIndices) { 4187 if (OpIdx == -1) 4188 continue; 4189 4190 const MCOperand &MO = Inst.getOperand(OpIdx); 4191 if (!MO.isImm() && !MO.isExpr()) 4192 continue; 4193 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4194 continue; 4195 4196 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4197 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4198 Error(getConstLoc(Operands), 4199 "inline constants are not allowed for this operand"); 4200 return false; 4201 } 4202 4203 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4204 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4205 if (NumLiterals == 0 || LiteralValue != Value) { 4206 LiteralValue = Value; 4207 ++NumLiterals; 4208 } 4209 } else if (MO.isExpr()) { 4210 ++NumExprs; 4211 } 4212 } 4213 NumLiterals += NumExprs; 4214 4215 if (!NumLiterals) 4216 return true; 4217 4218 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4219 Error(getLitLoc(Operands), "literal operands are not supported"); 4220 return false; 4221 } 4222 4223 if (NumLiterals > 1) { 4224 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4225 return false; 4226 } 4227 4228 return true; 4229 } 4230 4231 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4232 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4233 const MCRegisterInfo *MRI) { 4234 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4235 if (OpIdx < 0) 4236 return -1; 4237 4238 const MCOperand &Op = Inst.getOperand(OpIdx); 4239 if (!Op.isReg()) 4240 return -1; 4241 4242 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4243 auto Reg = Sub ? Sub : Op.getReg(); 4244 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4245 return AGPR32.contains(Reg) ? 1 : 0; 4246 } 4247 4248 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4249 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4250 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4251 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4252 SIInstrFlags::DS)) == 0) 4253 return true; 4254 4255 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4256 : AMDGPU::OpName::vdata; 4257 4258 const MCRegisterInfo *MRI = getMRI(); 4259 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4260 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4261 4262 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4263 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4264 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4265 return false; 4266 } 4267 4268 auto FB = getFeatureBits(); 4269 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4270 if (DataAreg < 0 || DstAreg < 0) 4271 return true; 4272 return DstAreg == DataAreg; 4273 } 4274 4275 return DstAreg < 1 && DataAreg < 1; 4276 } 4277 4278 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4279 auto FB = getFeatureBits(); 4280 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4281 return true; 4282 4283 const MCRegisterInfo *MRI = getMRI(); 4284 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4285 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4286 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4287 const MCOperand &Op = Inst.getOperand(I); 4288 if (!Op.isReg()) 4289 continue; 4290 4291 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4292 if (!Sub) 4293 continue; 4294 4295 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4296 return false; 4297 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4298 return false; 4299 } 4300 4301 return true; 4302 } 4303 4304 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4305 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4306 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4307 if (Op.isBLGP()) 4308 return Op.getStartLoc(); 4309 } 4310 return SMLoc(); 4311 } 4312 4313 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4314 const OperandVector &Operands) { 4315 unsigned Opc = Inst.getOpcode(); 4316 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4317 if (BlgpIdx == -1) 4318 return true; 4319 SMLoc BLGPLoc = getBLGPLoc(Operands); 4320 if (!BLGPLoc.isValid()) 4321 return true; 4322 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4323 auto FB = getFeatureBits(); 4324 bool UsesNeg = false; 4325 if (FB[AMDGPU::FeatureGFX940Insts]) { 4326 switch (Opc) { 4327 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4328 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4329 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4330 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4331 UsesNeg = true; 4332 } 4333 } 4334 4335 if (IsNeg == UsesNeg) 4336 return true; 4337 4338 Error(BLGPLoc, 4339 UsesNeg ? "invalid modifier: blgp is not supported" 4340 : "invalid modifier: neg is not supported"); 4341 4342 return false; 4343 } 4344 4345 // gfx90a has an undocumented limitation: 4346 // DS_GWS opcodes must use even aligned registers. 4347 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4348 const OperandVector &Operands) { 4349 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4350 return true; 4351 4352 int Opc = Inst.getOpcode(); 4353 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4354 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4355 return true; 4356 4357 const MCRegisterInfo *MRI = getMRI(); 4358 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4359 int Data0Pos = 4360 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4361 assert(Data0Pos != -1); 4362 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4363 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4364 if (RegIdx & 1) { 4365 SMLoc RegLoc = getRegLoc(Reg, Operands); 4366 Error(RegLoc, "vgpr must be even aligned"); 4367 return false; 4368 } 4369 4370 return true; 4371 } 4372 4373 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4374 const OperandVector &Operands, 4375 const SMLoc &IDLoc) { 4376 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4377 AMDGPU::OpName::cpol); 4378 if (CPolPos == -1) 4379 return true; 4380 4381 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4382 4383 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4384 if (TSFlags & SIInstrFlags::SMRD) { 4385 if (CPol && (isSI() || isCI())) { 4386 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4387 Error(S, "cache policy is not supported for SMRD instructions"); 4388 return false; 4389 } 4390 if (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC)) { 4391 Error(IDLoc, "invalid cache policy for SMEM instruction"); 4392 return false; 4393 } 4394 } 4395 4396 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4397 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4398 StringRef CStr(S.getPointer()); 4399 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4400 Error(S, "scc is not supported on this GPU"); 4401 return false; 4402 } 4403 4404 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4405 return true; 4406 4407 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4408 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4409 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4410 : "instruction must use glc"); 4411 return false; 4412 } 4413 } else { 4414 if (CPol & CPol::GLC) { 4415 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4416 StringRef CStr(S.getPointer()); 4417 S = SMLoc::getFromPointer( 4418 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4419 Error(S, isGFX940() ? "instruction must not use sc0" 4420 : "instruction must not use glc"); 4421 return false; 4422 } 4423 } 4424 4425 return true; 4426 } 4427 4428 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4429 const OperandVector &Operands, 4430 const SMLoc &IDLoc) { 4431 if (isGFX940()) 4432 return true; 4433 4434 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4435 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4436 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4437 return true; 4438 // This is FLAT LDS DMA. 4439 4440 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4441 StringRef CStr(S.getPointer()); 4442 if (!CStr.startswith("lds")) { 4443 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4444 // And LDS version should have 'lds' modifier, but it follows optional 4445 // operands so its absense is ignored by the matcher. 4446 Error(IDLoc, "invalid operands for instruction"); 4447 return false; 4448 } 4449 4450 return true; 4451 } 4452 4453 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4454 const SMLoc &IDLoc, 4455 const OperandVector &Operands) { 4456 if (auto ErrMsg = validateLdsDirect(Inst)) { 4457 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4458 return false; 4459 } 4460 if (!validateSOPLiteral(Inst)) { 4461 Error(getLitLoc(Operands), 4462 "only one literal operand is allowed"); 4463 return false; 4464 } 4465 if (!validateVOPLiteral(Inst, Operands)) { 4466 return false; 4467 } 4468 if (!validateConstantBusLimitations(Inst, Operands)) { 4469 return false; 4470 } 4471 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4472 return false; 4473 } 4474 if (!validateIntClampSupported(Inst)) { 4475 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4476 "integer clamping is not supported on this GPU"); 4477 return false; 4478 } 4479 if (!validateOpSel(Inst)) { 4480 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4481 "invalid op_sel operand"); 4482 return false; 4483 } 4484 if (!validateDPP(Inst, Operands)) { 4485 return false; 4486 } 4487 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4488 if (!validateMIMGD16(Inst)) { 4489 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4490 "d16 modifier is not supported on this GPU"); 4491 return false; 4492 } 4493 if (!validateMIMGDim(Inst)) { 4494 Error(IDLoc, "dim modifier is required on this GPU"); 4495 return false; 4496 } 4497 if (!validateMIMGMSAA(Inst)) { 4498 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4499 "invalid dim; must be MSAA type"); 4500 return false; 4501 } 4502 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4503 Error(IDLoc, *ErrMsg); 4504 return false; 4505 } 4506 if (!validateMIMGAddrSize(Inst)) { 4507 Error(IDLoc, 4508 "image address size does not match dim and a16"); 4509 return false; 4510 } 4511 if (!validateMIMGAtomicDMask(Inst)) { 4512 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4513 "invalid atomic image dmask"); 4514 return false; 4515 } 4516 if (!validateMIMGGatherDMask(Inst)) { 4517 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4518 "invalid image_gather dmask: only one bit must be set"); 4519 return false; 4520 } 4521 if (!validateMovrels(Inst, Operands)) { 4522 return false; 4523 } 4524 if (!validateFlatOffset(Inst, Operands)) { 4525 return false; 4526 } 4527 if (!validateSMEMOffset(Inst, Operands)) { 4528 return false; 4529 } 4530 if (!validateMAIAccWrite(Inst, Operands)) { 4531 return false; 4532 } 4533 if (!validateMFMA(Inst, Operands)) { 4534 return false; 4535 } 4536 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4537 return false; 4538 } 4539 4540 if (!validateAGPRLdSt(Inst)) { 4541 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4542 ? "invalid register class: data and dst should be all VGPR or AGPR" 4543 : "invalid register class: agpr loads and stores not supported on this GPU" 4544 ); 4545 return false; 4546 } 4547 if (!validateVGPRAlign(Inst)) { 4548 Error(IDLoc, 4549 "invalid register class: vgpr tuples must be 64 bit aligned"); 4550 return false; 4551 } 4552 if (!validateGWS(Inst, Operands)) { 4553 return false; 4554 } 4555 4556 if (!validateBLGP(Inst, Operands)) { 4557 return false; 4558 } 4559 4560 if (!validateDivScale(Inst)) { 4561 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4562 return false; 4563 } 4564 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4565 return false; 4566 } 4567 4568 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4569 return false; 4570 } 4571 4572 return true; 4573 } 4574 4575 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4576 const FeatureBitset &FBS, 4577 unsigned VariantID = 0); 4578 4579 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4580 const FeatureBitset &AvailableFeatures, 4581 unsigned VariantID); 4582 4583 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4584 const FeatureBitset &FBS) { 4585 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4586 } 4587 4588 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4589 const FeatureBitset &FBS, 4590 ArrayRef<unsigned> Variants) { 4591 for (auto Variant : Variants) { 4592 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4593 return true; 4594 } 4595 4596 return false; 4597 } 4598 4599 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4600 const SMLoc &IDLoc) { 4601 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4602 4603 // Check if requested instruction variant is supported. 4604 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4605 return false; 4606 4607 // This instruction is not supported. 4608 // Clear any other pending errors because they are no longer relevant. 4609 getParser().clearPendingErrors(); 4610 4611 // Requested instruction variant is not supported. 4612 // Check if any other variants are supported. 4613 StringRef VariantName = getMatchedVariantName(); 4614 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4615 return Error(IDLoc, 4616 Twine(VariantName, 4617 " variant of this instruction is not supported")); 4618 } 4619 4620 // Finally check if this instruction is supported on any other GPU. 4621 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4622 return Error(IDLoc, "instruction not supported on this GPU"); 4623 } 4624 4625 // Instruction not supported on any GPU. Probably a typo. 4626 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4627 return Error(IDLoc, "invalid instruction" + Suggestion); 4628 } 4629 4630 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4631 OperandVector &Operands, 4632 MCStreamer &Out, 4633 uint64_t &ErrorInfo, 4634 bool MatchingInlineAsm) { 4635 MCInst Inst; 4636 unsigned Result = Match_Success; 4637 for (auto Variant : getMatchedVariants()) { 4638 uint64_t EI; 4639 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4640 Variant); 4641 // We order match statuses from least to most specific. We use most specific 4642 // status as resulting 4643 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4644 if ((R == Match_Success) || 4645 (R == Match_PreferE32) || 4646 (R == Match_MissingFeature && Result != Match_PreferE32) || 4647 (R == Match_InvalidOperand && Result != Match_MissingFeature 4648 && Result != Match_PreferE32) || 4649 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4650 && Result != Match_MissingFeature 4651 && Result != Match_PreferE32)) { 4652 Result = R; 4653 ErrorInfo = EI; 4654 } 4655 if (R == Match_Success) 4656 break; 4657 } 4658 4659 if (Result == Match_Success) { 4660 if (!validateInstruction(Inst, IDLoc, Operands)) { 4661 return true; 4662 } 4663 Inst.setLoc(IDLoc); 4664 Out.emitInstruction(Inst, getSTI()); 4665 return false; 4666 } 4667 4668 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4669 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4670 return true; 4671 } 4672 4673 switch (Result) { 4674 default: break; 4675 case Match_MissingFeature: 4676 // It has been verified that the specified instruction 4677 // mnemonic is valid. A match was found but it requires 4678 // features which are not supported on this GPU. 4679 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4680 4681 case Match_InvalidOperand: { 4682 SMLoc ErrorLoc = IDLoc; 4683 if (ErrorInfo != ~0ULL) { 4684 if (ErrorInfo >= Operands.size()) { 4685 return Error(IDLoc, "too few operands for instruction"); 4686 } 4687 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4688 if (ErrorLoc == SMLoc()) 4689 ErrorLoc = IDLoc; 4690 } 4691 return Error(ErrorLoc, "invalid operand for instruction"); 4692 } 4693 4694 case Match_PreferE32: 4695 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4696 "should be encoded as e32"); 4697 case Match_MnemonicFail: 4698 llvm_unreachable("Invalid instructions should have been handled already"); 4699 } 4700 llvm_unreachable("Implement any new match types added!"); 4701 } 4702 4703 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4704 int64_t Tmp = -1; 4705 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4706 return true; 4707 } 4708 if (getParser().parseAbsoluteExpression(Tmp)) { 4709 return true; 4710 } 4711 Ret = static_cast<uint32_t>(Tmp); 4712 return false; 4713 } 4714 4715 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4716 uint32_t &Minor) { 4717 if (ParseAsAbsoluteExpression(Major)) 4718 return TokError("invalid major version"); 4719 4720 if (!trySkipToken(AsmToken::Comma)) 4721 return TokError("minor version number required, comma expected"); 4722 4723 if (ParseAsAbsoluteExpression(Minor)) 4724 return TokError("invalid minor version"); 4725 4726 return false; 4727 } 4728 4729 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4730 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4731 return TokError("directive only supported for amdgcn architecture"); 4732 4733 std::string TargetIDDirective; 4734 SMLoc TargetStart = getTok().getLoc(); 4735 if (getParser().parseEscapedString(TargetIDDirective)) 4736 return true; 4737 4738 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4739 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4740 return getParser().Error(TargetRange.Start, 4741 (Twine(".amdgcn_target directive's target id ") + 4742 Twine(TargetIDDirective) + 4743 Twine(" does not match the specified target id ") + 4744 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4745 4746 return false; 4747 } 4748 4749 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4750 return Error(Range.Start, "value out of range", Range); 4751 } 4752 4753 bool AMDGPUAsmParser::calculateGPRBlocks( 4754 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4755 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4756 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4757 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4758 // TODO(scott.linder): These calculations are duplicated from 4759 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4760 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4761 4762 unsigned NumVGPRs = NextFreeVGPR; 4763 unsigned NumSGPRs = NextFreeSGPR; 4764 4765 if (Version.Major >= 10) 4766 NumSGPRs = 0; 4767 else { 4768 unsigned MaxAddressableNumSGPRs = 4769 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4770 4771 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4772 NumSGPRs > MaxAddressableNumSGPRs) 4773 return OutOfRangeError(SGPRRange); 4774 4775 NumSGPRs += 4776 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4777 4778 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4779 NumSGPRs > MaxAddressableNumSGPRs) 4780 return OutOfRangeError(SGPRRange); 4781 4782 if (Features.test(FeatureSGPRInitBug)) 4783 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4784 } 4785 4786 VGPRBlocks = 4787 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4788 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4789 4790 return false; 4791 } 4792 4793 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4794 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4795 return TokError("directive only supported for amdgcn architecture"); 4796 4797 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4798 return TokError("directive only supported for amdhsa OS"); 4799 4800 StringRef KernelName; 4801 if (getParser().parseIdentifier(KernelName)) 4802 return true; 4803 4804 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4805 4806 StringSet<> Seen; 4807 4808 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4809 4810 SMRange VGPRRange; 4811 uint64_t NextFreeVGPR = 0; 4812 uint64_t AccumOffset = 0; 4813 uint64_t SharedVGPRCount = 0; 4814 SMRange SGPRRange; 4815 uint64_t NextFreeSGPR = 0; 4816 4817 // Count the number of user SGPRs implied from the enabled feature bits. 4818 unsigned ImpliedUserSGPRCount = 0; 4819 4820 // Track if the asm explicitly contains the directive for the user SGPR 4821 // count. 4822 Optional<unsigned> ExplicitUserSGPRCount; 4823 bool ReserveVCC = true; 4824 bool ReserveFlatScr = true; 4825 Optional<bool> EnableWavefrontSize32; 4826 4827 while (true) { 4828 while (trySkipToken(AsmToken::EndOfStatement)); 4829 4830 StringRef ID; 4831 SMRange IDRange = getTok().getLocRange(); 4832 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4833 return true; 4834 4835 if (ID == ".end_amdhsa_kernel") 4836 break; 4837 4838 if (Seen.find(ID) != Seen.end()) 4839 return TokError(".amdhsa_ directives cannot be repeated"); 4840 Seen.insert(ID); 4841 4842 SMLoc ValStart = getLoc(); 4843 int64_t IVal; 4844 if (getParser().parseAbsoluteExpression(IVal)) 4845 return true; 4846 SMLoc ValEnd = getLoc(); 4847 SMRange ValRange = SMRange(ValStart, ValEnd); 4848 4849 if (IVal < 0) 4850 return OutOfRangeError(ValRange); 4851 4852 uint64_t Val = IVal; 4853 4854 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4855 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4856 return OutOfRangeError(RANGE); \ 4857 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4858 4859 if (ID == ".amdhsa_group_segment_fixed_size") { 4860 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4861 return OutOfRangeError(ValRange); 4862 KD.group_segment_fixed_size = Val; 4863 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4864 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4865 return OutOfRangeError(ValRange); 4866 KD.private_segment_fixed_size = Val; 4867 } else if (ID == ".amdhsa_kernarg_size") { 4868 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4869 return OutOfRangeError(ValRange); 4870 KD.kernarg_size = Val; 4871 } else if (ID == ".amdhsa_user_sgpr_count") { 4872 ExplicitUserSGPRCount = Val; 4873 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4874 if (hasArchitectedFlatScratch()) 4875 return Error(IDRange.Start, 4876 "directive is not supported with architected flat scratch", 4877 IDRange); 4878 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4879 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4880 Val, ValRange); 4881 if (Val) 4882 ImpliedUserSGPRCount += 4; 4883 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4884 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4885 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4886 ValRange); 4887 if (Val) 4888 ImpliedUserSGPRCount += 2; 4889 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4890 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4891 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4892 ValRange); 4893 if (Val) 4894 ImpliedUserSGPRCount += 2; 4895 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4896 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4897 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4898 Val, ValRange); 4899 if (Val) 4900 ImpliedUserSGPRCount += 2; 4901 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4902 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4903 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4904 ValRange); 4905 if (Val) 4906 ImpliedUserSGPRCount += 2; 4907 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4908 if (hasArchitectedFlatScratch()) 4909 return Error(IDRange.Start, 4910 "directive is not supported with architected flat scratch", 4911 IDRange); 4912 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4913 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4914 ValRange); 4915 if (Val) 4916 ImpliedUserSGPRCount += 2; 4917 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4918 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4919 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4920 Val, ValRange); 4921 if (Val) 4922 ImpliedUserSGPRCount += 1; 4923 } else if (ID == ".amdhsa_wavefront_size32") { 4924 if (IVersion.Major < 10) 4925 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4926 EnableWavefrontSize32 = Val; 4927 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4928 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4929 Val, ValRange); 4930 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4931 if (hasArchitectedFlatScratch()) 4932 return Error(IDRange.Start, 4933 "directive is not supported with architected flat scratch", 4934 IDRange); 4935 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4936 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4937 } else if (ID == ".amdhsa_enable_private_segment") { 4938 if (!hasArchitectedFlatScratch()) 4939 return Error( 4940 IDRange.Start, 4941 "directive is not supported without architected flat scratch", 4942 IDRange); 4943 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4944 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4945 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4946 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4947 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4948 ValRange); 4949 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4950 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4951 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4952 ValRange); 4953 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4954 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4955 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4956 ValRange); 4957 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4958 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4959 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4960 ValRange); 4961 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4962 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4963 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4964 ValRange); 4965 } else if (ID == ".amdhsa_next_free_vgpr") { 4966 VGPRRange = ValRange; 4967 NextFreeVGPR = Val; 4968 } else if (ID == ".amdhsa_next_free_sgpr") { 4969 SGPRRange = ValRange; 4970 NextFreeSGPR = Val; 4971 } else if (ID == ".amdhsa_accum_offset") { 4972 if (!isGFX90A()) 4973 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4974 AccumOffset = Val; 4975 } else if (ID == ".amdhsa_reserve_vcc") { 4976 if (!isUInt<1>(Val)) 4977 return OutOfRangeError(ValRange); 4978 ReserveVCC = Val; 4979 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4980 if (IVersion.Major < 7) 4981 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4982 if (hasArchitectedFlatScratch()) 4983 return Error(IDRange.Start, 4984 "directive is not supported with architected flat scratch", 4985 IDRange); 4986 if (!isUInt<1>(Val)) 4987 return OutOfRangeError(ValRange); 4988 ReserveFlatScr = Val; 4989 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4990 if (IVersion.Major < 8) 4991 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4992 if (!isUInt<1>(Val)) 4993 return OutOfRangeError(ValRange); 4994 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4995 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4996 IDRange); 4997 } else if (ID == ".amdhsa_float_round_mode_32") { 4998 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4999 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 5000 } else if (ID == ".amdhsa_float_round_mode_16_64") { 5001 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5002 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 5003 } else if (ID == ".amdhsa_float_denorm_mode_32") { 5004 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5005 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5006 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5007 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5008 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5009 ValRange); 5010 } else if (ID == ".amdhsa_dx10_clamp") { 5011 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5012 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5013 } else if (ID == ".amdhsa_ieee_mode") { 5014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5015 Val, ValRange); 5016 } else if (ID == ".amdhsa_fp16_overflow") { 5017 if (IVersion.Major < 9) 5018 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5019 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5020 ValRange); 5021 } else if (ID == ".amdhsa_tg_split") { 5022 if (!isGFX90A()) 5023 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5024 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5025 ValRange); 5026 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5027 if (IVersion.Major < 10) 5028 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5029 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5030 ValRange); 5031 } else if (ID == ".amdhsa_memory_ordered") { 5032 if (IVersion.Major < 10) 5033 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5034 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5035 ValRange); 5036 } else if (ID == ".amdhsa_forward_progress") { 5037 if (IVersion.Major < 10) 5038 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5039 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5040 ValRange); 5041 } else if (ID == ".amdhsa_shared_vgpr_count") { 5042 if (IVersion.Major < 10) 5043 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5044 SharedVGPRCount = Val; 5045 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5046 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5047 ValRange); 5048 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5049 PARSE_BITS_ENTRY( 5050 KD.compute_pgm_rsrc2, 5051 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5052 ValRange); 5053 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5054 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5055 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5056 Val, ValRange); 5057 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5058 PARSE_BITS_ENTRY( 5059 KD.compute_pgm_rsrc2, 5060 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5061 ValRange); 5062 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5063 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5064 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5065 Val, ValRange); 5066 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5067 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5068 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5069 Val, ValRange); 5070 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5071 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5072 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5073 Val, ValRange); 5074 } else if (ID == ".amdhsa_exception_int_div_zero") { 5075 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5076 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5077 Val, ValRange); 5078 } else { 5079 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5080 } 5081 5082 #undef PARSE_BITS_ENTRY 5083 } 5084 5085 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5086 return TokError(".amdhsa_next_free_vgpr directive is required"); 5087 5088 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5089 return TokError(".amdhsa_next_free_sgpr directive is required"); 5090 5091 unsigned VGPRBlocks; 5092 unsigned SGPRBlocks; 5093 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5094 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5095 EnableWavefrontSize32, NextFreeVGPR, 5096 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5097 SGPRBlocks)) 5098 return true; 5099 5100 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5101 VGPRBlocks)) 5102 return OutOfRangeError(VGPRRange); 5103 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5104 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5105 5106 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5107 SGPRBlocks)) 5108 return OutOfRangeError(SGPRRange); 5109 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5110 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5111 SGPRBlocks); 5112 5113 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5114 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5115 "enabled user SGPRs"); 5116 5117 unsigned UserSGPRCount = 5118 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5119 5120 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5121 return TokError("too many user SGPRs enabled"); 5122 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5123 UserSGPRCount); 5124 5125 if (isGFX90A()) { 5126 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5127 return TokError(".amdhsa_accum_offset directive is required"); 5128 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5129 return TokError("accum_offset should be in range [4..256] in " 5130 "increments of 4"); 5131 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5132 return TokError("accum_offset exceeds total VGPR allocation"); 5133 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5134 (AccumOffset / 4 - 1)); 5135 } 5136 5137 if (IVersion.Major == 10) { 5138 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5139 if (SharedVGPRCount && EnableWavefrontSize32) { 5140 return TokError("shared_vgpr_count directive not valid on " 5141 "wavefront size 32"); 5142 } 5143 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5144 return TokError("shared_vgpr_count*2 + " 5145 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5146 "exceed 63\n"); 5147 } 5148 } 5149 5150 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5151 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5152 ReserveFlatScr); 5153 return false; 5154 } 5155 5156 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5157 uint32_t Major; 5158 uint32_t Minor; 5159 5160 if (ParseDirectiveMajorMinor(Major, Minor)) 5161 return true; 5162 5163 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5164 return false; 5165 } 5166 5167 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5168 uint32_t Major; 5169 uint32_t Minor; 5170 uint32_t Stepping; 5171 StringRef VendorName; 5172 StringRef ArchName; 5173 5174 // If this directive has no arguments, then use the ISA version for the 5175 // targeted GPU. 5176 if (isToken(AsmToken::EndOfStatement)) { 5177 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5178 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5179 ISA.Stepping, 5180 "AMD", "AMDGPU"); 5181 return false; 5182 } 5183 5184 if (ParseDirectiveMajorMinor(Major, Minor)) 5185 return true; 5186 5187 if (!trySkipToken(AsmToken::Comma)) 5188 return TokError("stepping version number required, comma expected"); 5189 5190 if (ParseAsAbsoluteExpression(Stepping)) 5191 return TokError("invalid stepping version"); 5192 5193 if (!trySkipToken(AsmToken::Comma)) 5194 return TokError("vendor name required, comma expected"); 5195 5196 if (!parseString(VendorName, "invalid vendor name")) 5197 return true; 5198 5199 if (!trySkipToken(AsmToken::Comma)) 5200 return TokError("arch name required, comma expected"); 5201 5202 if (!parseString(ArchName, "invalid arch name")) 5203 return true; 5204 5205 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5206 VendorName, ArchName); 5207 return false; 5208 } 5209 5210 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5211 amd_kernel_code_t &Header) { 5212 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5213 // assembly for backwards compatibility. 5214 if (ID == "max_scratch_backing_memory_byte_size") { 5215 Parser.eatToEndOfStatement(); 5216 return false; 5217 } 5218 5219 SmallString<40> ErrStr; 5220 raw_svector_ostream Err(ErrStr); 5221 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5222 return TokError(Err.str()); 5223 } 5224 Lex(); 5225 5226 if (ID == "enable_wavefront_size32") { 5227 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5228 if (!isGFX10Plus()) 5229 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5230 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5231 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5232 } else { 5233 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5234 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5235 } 5236 } 5237 5238 if (ID == "wavefront_size") { 5239 if (Header.wavefront_size == 5) { 5240 if (!isGFX10Plus()) 5241 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5242 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5243 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5244 } else if (Header.wavefront_size == 6) { 5245 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5246 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5247 } 5248 } 5249 5250 if (ID == "enable_wgp_mode") { 5251 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5252 !isGFX10Plus()) 5253 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5254 } 5255 5256 if (ID == "enable_mem_ordered") { 5257 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5258 !isGFX10Plus()) 5259 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5260 } 5261 5262 if (ID == "enable_fwd_progress") { 5263 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5264 !isGFX10Plus()) 5265 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5266 } 5267 5268 return false; 5269 } 5270 5271 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5272 amd_kernel_code_t Header; 5273 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5274 5275 while (true) { 5276 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5277 // will set the current token to EndOfStatement. 5278 while(trySkipToken(AsmToken::EndOfStatement)); 5279 5280 StringRef ID; 5281 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5282 return true; 5283 5284 if (ID == ".end_amd_kernel_code_t") 5285 break; 5286 5287 if (ParseAMDKernelCodeTValue(ID, Header)) 5288 return true; 5289 } 5290 5291 getTargetStreamer().EmitAMDKernelCodeT(Header); 5292 5293 return false; 5294 } 5295 5296 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5297 StringRef KernelName; 5298 if (!parseId(KernelName, "expected symbol name")) 5299 return true; 5300 5301 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5302 ELF::STT_AMDGPU_HSA_KERNEL); 5303 5304 KernelScope.initialize(getContext()); 5305 return false; 5306 } 5307 5308 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5309 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5310 return Error(getLoc(), 5311 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5312 "architectures"); 5313 } 5314 5315 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5316 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5317 return Error(getParser().getTok().getLoc(), "target id must match options"); 5318 5319 getTargetStreamer().EmitISAVersion(); 5320 Lex(); 5321 5322 return false; 5323 } 5324 5325 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5326 const char *AssemblerDirectiveBegin; 5327 const char *AssemblerDirectiveEnd; 5328 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5329 isHsaAbiVersion3AndAbove(&getSTI()) 5330 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5331 HSAMD::V3::AssemblerDirectiveEnd) 5332 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5333 HSAMD::AssemblerDirectiveEnd); 5334 5335 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5336 return Error(getLoc(), 5337 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5338 "not available on non-amdhsa OSes")).str()); 5339 } 5340 5341 std::string HSAMetadataString; 5342 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5343 HSAMetadataString)) 5344 return true; 5345 5346 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5347 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5348 return Error(getLoc(), "invalid HSA metadata"); 5349 } else { 5350 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5351 return Error(getLoc(), "invalid HSA metadata"); 5352 } 5353 5354 return false; 5355 } 5356 5357 /// Common code to parse out a block of text (typically YAML) between start and 5358 /// end directives. 5359 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5360 const char *AssemblerDirectiveEnd, 5361 std::string &CollectString) { 5362 5363 raw_string_ostream CollectStream(CollectString); 5364 5365 getLexer().setSkipSpace(false); 5366 5367 bool FoundEnd = false; 5368 while (!isToken(AsmToken::Eof)) { 5369 while (isToken(AsmToken::Space)) { 5370 CollectStream << getTokenStr(); 5371 Lex(); 5372 } 5373 5374 if (trySkipId(AssemblerDirectiveEnd)) { 5375 FoundEnd = true; 5376 break; 5377 } 5378 5379 CollectStream << Parser.parseStringToEndOfStatement() 5380 << getContext().getAsmInfo()->getSeparatorString(); 5381 5382 Parser.eatToEndOfStatement(); 5383 } 5384 5385 getLexer().setSkipSpace(true); 5386 5387 if (isToken(AsmToken::Eof) && !FoundEnd) { 5388 return TokError(Twine("expected directive ") + 5389 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5390 } 5391 5392 CollectStream.flush(); 5393 return false; 5394 } 5395 5396 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5397 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5398 std::string String; 5399 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5400 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5401 return true; 5402 5403 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5404 if (!PALMetadata->setFromString(String)) 5405 return Error(getLoc(), "invalid PAL metadata"); 5406 return false; 5407 } 5408 5409 /// Parse the assembler directive for old linear-format PAL metadata. 5410 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5411 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5412 return Error(getLoc(), 5413 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5414 "not available on non-amdpal OSes")).str()); 5415 } 5416 5417 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5418 PALMetadata->setLegacy(); 5419 for (;;) { 5420 uint32_t Key, Value; 5421 if (ParseAsAbsoluteExpression(Key)) { 5422 return TokError(Twine("invalid value in ") + 5423 Twine(PALMD::AssemblerDirective)); 5424 } 5425 if (!trySkipToken(AsmToken::Comma)) { 5426 return TokError(Twine("expected an even number of values in ") + 5427 Twine(PALMD::AssemblerDirective)); 5428 } 5429 if (ParseAsAbsoluteExpression(Value)) { 5430 return TokError(Twine("invalid value in ") + 5431 Twine(PALMD::AssemblerDirective)); 5432 } 5433 PALMetadata->setRegister(Key, Value); 5434 if (!trySkipToken(AsmToken::Comma)) 5435 break; 5436 } 5437 return false; 5438 } 5439 5440 /// ParseDirectiveAMDGPULDS 5441 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5442 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5443 if (getParser().checkForValidSection()) 5444 return true; 5445 5446 StringRef Name; 5447 SMLoc NameLoc = getLoc(); 5448 if (getParser().parseIdentifier(Name)) 5449 return TokError("expected identifier in directive"); 5450 5451 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5452 if (parseToken(AsmToken::Comma, "expected ','")) 5453 return true; 5454 5455 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5456 5457 int64_t Size; 5458 SMLoc SizeLoc = getLoc(); 5459 if (getParser().parseAbsoluteExpression(Size)) 5460 return true; 5461 if (Size < 0) 5462 return Error(SizeLoc, "size must be non-negative"); 5463 if (Size > LocalMemorySize) 5464 return Error(SizeLoc, "size is too large"); 5465 5466 int64_t Alignment = 4; 5467 if (trySkipToken(AsmToken::Comma)) { 5468 SMLoc AlignLoc = getLoc(); 5469 if (getParser().parseAbsoluteExpression(Alignment)) 5470 return true; 5471 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5472 return Error(AlignLoc, "alignment must be a power of two"); 5473 5474 // Alignment larger than the size of LDS is possible in theory, as long 5475 // as the linker manages to place to symbol at address 0, but we do want 5476 // to make sure the alignment fits nicely into a 32-bit integer. 5477 if (Alignment >= 1u << 31) 5478 return Error(AlignLoc, "alignment is too large"); 5479 } 5480 5481 if (parseToken(AsmToken::EndOfStatement, 5482 "unexpected token in '.amdgpu_lds' directive")) 5483 return true; 5484 5485 Symbol->redefineIfPossible(); 5486 if (!Symbol->isUndefined()) 5487 return Error(NameLoc, "invalid symbol redefinition"); 5488 5489 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5490 return false; 5491 } 5492 5493 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5494 StringRef IDVal = DirectiveID.getString(); 5495 5496 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5497 if (IDVal == ".amdhsa_kernel") 5498 return ParseDirectiveAMDHSAKernel(); 5499 5500 // TODO: Restructure/combine with PAL metadata directive. 5501 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5502 return ParseDirectiveHSAMetadata(); 5503 } else { 5504 if (IDVal == ".hsa_code_object_version") 5505 return ParseDirectiveHSACodeObjectVersion(); 5506 5507 if (IDVal == ".hsa_code_object_isa") 5508 return ParseDirectiveHSACodeObjectISA(); 5509 5510 if (IDVal == ".amd_kernel_code_t") 5511 return ParseDirectiveAMDKernelCodeT(); 5512 5513 if (IDVal == ".amdgpu_hsa_kernel") 5514 return ParseDirectiveAMDGPUHsaKernel(); 5515 5516 if (IDVal == ".amd_amdgpu_isa") 5517 return ParseDirectiveISAVersion(); 5518 5519 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5520 return ParseDirectiveHSAMetadata(); 5521 } 5522 5523 if (IDVal == ".amdgcn_target") 5524 return ParseDirectiveAMDGCNTarget(); 5525 5526 if (IDVal == ".amdgpu_lds") 5527 return ParseDirectiveAMDGPULDS(); 5528 5529 if (IDVal == PALMD::AssemblerDirectiveBegin) 5530 return ParseDirectivePALMetadataBegin(); 5531 5532 if (IDVal == PALMD::AssemblerDirective) 5533 return ParseDirectivePALMetadata(); 5534 5535 return true; 5536 } 5537 5538 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5539 unsigned RegNo) { 5540 5541 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5542 return isGFX9Plus(); 5543 5544 // GFX10 has 2 more SGPRs 104 and 105. 5545 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5546 return hasSGPR104_SGPR105(); 5547 5548 switch (RegNo) { 5549 case AMDGPU::SRC_SHARED_BASE: 5550 case AMDGPU::SRC_SHARED_LIMIT: 5551 case AMDGPU::SRC_PRIVATE_BASE: 5552 case AMDGPU::SRC_PRIVATE_LIMIT: 5553 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5554 return isGFX9Plus(); 5555 case AMDGPU::TBA: 5556 case AMDGPU::TBA_LO: 5557 case AMDGPU::TBA_HI: 5558 case AMDGPU::TMA: 5559 case AMDGPU::TMA_LO: 5560 case AMDGPU::TMA_HI: 5561 return !isGFX9Plus(); 5562 case AMDGPU::XNACK_MASK: 5563 case AMDGPU::XNACK_MASK_LO: 5564 case AMDGPU::XNACK_MASK_HI: 5565 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5566 case AMDGPU::SGPR_NULL: 5567 return isGFX10Plus(); 5568 default: 5569 break; 5570 } 5571 5572 if (isCI()) 5573 return true; 5574 5575 if (isSI() || isGFX10Plus()) { 5576 // No flat_scr on SI. 5577 // On GFX10 flat scratch is not a valid register operand and can only be 5578 // accessed with s_setreg/s_getreg. 5579 switch (RegNo) { 5580 case AMDGPU::FLAT_SCR: 5581 case AMDGPU::FLAT_SCR_LO: 5582 case AMDGPU::FLAT_SCR_HI: 5583 return false; 5584 default: 5585 return true; 5586 } 5587 } 5588 5589 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5590 // SI/CI have. 5591 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5592 return hasSGPR102_SGPR103(); 5593 5594 return true; 5595 } 5596 5597 OperandMatchResultTy 5598 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5599 OperandMode Mode) { 5600 // Try to parse with a custom parser 5601 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5602 5603 // If we successfully parsed the operand or if there as an error parsing, 5604 // we are done. 5605 // 5606 // If we are parsing after we reach EndOfStatement then this means we 5607 // are appending default values to the Operands list. This is only done 5608 // by custom parser, so we shouldn't continue on to the generic parsing. 5609 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5610 isToken(AsmToken::EndOfStatement)) 5611 return ResTy; 5612 5613 SMLoc RBraceLoc; 5614 SMLoc LBraceLoc = getLoc(); 5615 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5616 unsigned Prefix = Operands.size(); 5617 5618 for (;;) { 5619 auto Loc = getLoc(); 5620 ResTy = parseReg(Operands); 5621 if (ResTy == MatchOperand_NoMatch) 5622 Error(Loc, "expected a register"); 5623 if (ResTy != MatchOperand_Success) 5624 return MatchOperand_ParseFail; 5625 5626 RBraceLoc = getLoc(); 5627 if (trySkipToken(AsmToken::RBrac)) 5628 break; 5629 5630 if (!skipToken(AsmToken::Comma, 5631 "expected a comma or a closing square bracket")) { 5632 return MatchOperand_ParseFail; 5633 } 5634 } 5635 5636 if (Operands.size() - Prefix > 1) { 5637 Operands.insert(Operands.begin() + Prefix, 5638 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5639 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5640 } 5641 5642 return MatchOperand_Success; 5643 } 5644 5645 return parseRegOrImm(Operands); 5646 } 5647 5648 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5649 // Clear any forced encodings from the previous instruction. 5650 setForcedEncodingSize(0); 5651 setForcedDPP(false); 5652 setForcedSDWA(false); 5653 5654 if (Name.endswith("_e64")) { 5655 setForcedEncodingSize(64); 5656 return Name.substr(0, Name.size() - 4); 5657 } else if (Name.endswith("_e32")) { 5658 setForcedEncodingSize(32); 5659 return Name.substr(0, Name.size() - 4); 5660 } else if (Name.endswith("_dpp")) { 5661 setForcedDPP(true); 5662 return Name.substr(0, Name.size() - 4); 5663 } else if (Name.endswith("_sdwa")) { 5664 setForcedSDWA(true); 5665 return Name.substr(0, Name.size() - 5); 5666 } 5667 return Name; 5668 } 5669 5670 static void applyMnemonicAliases(StringRef &Mnemonic, 5671 const FeatureBitset &Features, 5672 unsigned VariantID); 5673 5674 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5675 StringRef Name, 5676 SMLoc NameLoc, OperandVector &Operands) { 5677 // Add the instruction mnemonic 5678 Name = parseMnemonicSuffix(Name); 5679 5680 // If the target architecture uses MnemonicAlias, call it here to parse 5681 // operands correctly. 5682 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5683 5684 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5685 5686 bool IsMIMG = Name.startswith("image_"); 5687 5688 while (!trySkipToken(AsmToken::EndOfStatement)) { 5689 OperandMode Mode = OperandMode_Default; 5690 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5691 Mode = OperandMode_NSA; 5692 CPolSeen = 0; 5693 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5694 5695 if (Res != MatchOperand_Success) { 5696 checkUnsupportedInstruction(Name, NameLoc); 5697 if (!Parser.hasPendingError()) { 5698 // FIXME: use real operand location rather than the current location. 5699 StringRef Msg = 5700 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5701 "not a valid operand."; 5702 Error(getLoc(), Msg); 5703 } 5704 while (!trySkipToken(AsmToken::EndOfStatement)) { 5705 lex(); 5706 } 5707 return true; 5708 } 5709 5710 // Eat the comma or space if there is one. 5711 trySkipToken(AsmToken::Comma); 5712 } 5713 5714 return false; 5715 } 5716 5717 //===----------------------------------------------------------------------===// 5718 // Utility functions 5719 //===----------------------------------------------------------------------===// 5720 5721 OperandMatchResultTy 5722 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5723 5724 if (!trySkipId(Prefix, AsmToken::Colon)) 5725 return MatchOperand_NoMatch; 5726 5727 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5728 } 5729 5730 OperandMatchResultTy 5731 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5732 AMDGPUOperand::ImmTy ImmTy, 5733 bool (*ConvertResult)(int64_t&)) { 5734 SMLoc S = getLoc(); 5735 int64_t Value = 0; 5736 5737 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5738 if (Res != MatchOperand_Success) 5739 return Res; 5740 5741 if (ConvertResult && !ConvertResult(Value)) { 5742 Error(S, "invalid " + StringRef(Prefix) + " value."); 5743 } 5744 5745 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5746 return MatchOperand_Success; 5747 } 5748 5749 OperandMatchResultTy 5750 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5751 OperandVector &Operands, 5752 AMDGPUOperand::ImmTy ImmTy, 5753 bool (*ConvertResult)(int64_t&)) { 5754 SMLoc S = getLoc(); 5755 if (!trySkipId(Prefix, AsmToken::Colon)) 5756 return MatchOperand_NoMatch; 5757 5758 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5759 return MatchOperand_ParseFail; 5760 5761 unsigned Val = 0; 5762 const unsigned MaxSize = 4; 5763 5764 // FIXME: How to verify the number of elements matches the number of src 5765 // operands? 5766 for (int I = 0; ; ++I) { 5767 int64_t Op; 5768 SMLoc Loc = getLoc(); 5769 if (!parseExpr(Op)) 5770 return MatchOperand_ParseFail; 5771 5772 if (Op != 0 && Op != 1) { 5773 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5774 return MatchOperand_ParseFail; 5775 } 5776 5777 Val |= (Op << I); 5778 5779 if (trySkipToken(AsmToken::RBrac)) 5780 break; 5781 5782 if (I + 1 == MaxSize) { 5783 Error(getLoc(), "expected a closing square bracket"); 5784 return MatchOperand_ParseFail; 5785 } 5786 5787 if (!skipToken(AsmToken::Comma, "expected a comma")) 5788 return MatchOperand_ParseFail; 5789 } 5790 5791 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5792 return MatchOperand_Success; 5793 } 5794 5795 OperandMatchResultTy 5796 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5797 AMDGPUOperand::ImmTy ImmTy) { 5798 int64_t Bit; 5799 SMLoc S = getLoc(); 5800 5801 if (trySkipId(Name)) { 5802 Bit = 1; 5803 } else if (trySkipId("no", Name)) { 5804 Bit = 0; 5805 } else { 5806 return MatchOperand_NoMatch; 5807 } 5808 5809 if (Name == "r128" && !hasMIMG_R128()) { 5810 Error(S, "r128 modifier is not supported on this GPU"); 5811 return MatchOperand_ParseFail; 5812 } 5813 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5814 Error(S, "a16 modifier is not supported on this GPU"); 5815 return MatchOperand_ParseFail; 5816 } 5817 5818 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5819 ImmTy = AMDGPUOperand::ImmTyR128A16; 5820 5821 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5822 return MatchOperand_Success; 5823 } 5824 5825 OperandMatchResultTy 5826 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5827 unsigned CPolOn = 0; 5828 unsigned CPolOff = 0; 5829 SMLoc S = getLoc(); 5830 5831 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5832 if (isGFX940() && !Mnemo.startswith("s_")) { 5833 if (trySkipId("sc0")) 5834 CPolOn = AMDGPU::CPol::SC0; 5835 else if (trySkipId("nosc0")) 5836 CPolOff = AMDGPU::CPol::SC0; 5837 else if (trySkipId("nt")) 5838 CPolOn = AMDGPU::CPol::NT; 5839 else if (trySkipId("nont")) 5840 CPolOff = AMDGPU::CPol::NT; 5841 else if (trySkipId("sc1")) 5842 CPolOn = AMDGPU::CPol::SC1; 5843 else if (trySkipId("nosc1")) 5844 CPolOff = AMDGPU::CPol::SC1; 5845 else 5846 return MatchOperand_NoMatch; 5847 } 5848 else if (trySkipId("glc")) 5849 CPolOn = AMDGPU::CPol::GLC; 5850 else if (trySkipId("noglc")) 5851 CPolOff = AMDGPU::CPol::GLC; 5852 else if (trySkipId("slc")) 5853 CPolOn = AMDGPU::CPol::SLC; 5854 else if (trySkipId("noslc")) 5855 CPolOff = AMDGPU::CPol::SLC; 5856 else if (trySkipId("dlc")) 5857 CPolOn = AMDGPU::CPol::DLC; 5858 else if (trySkipId("nodlc")) 5859 CPolOff = AMDGPU::CPol::DLC; 5860 else if (trySkipId("scc")) 5861 CPolOn = AMDGPU::CPol::SCC; 5862 else if (trySkipId("noscc")) 5863 CPolOff = AMDGPU::CPol::SCC; 5864 else 5865 return MatchOperand_NoMatch; 5866 5867 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5868 Error(S, "dlc modifier is not supported on this GPU"); 5869 return MatchOperand_ParseFail; 5870 } 5871 5872 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5873 Error(S, "scc modifier is not supported on this GPU"); 5874 return MatchOperand_ParseFail; 5875 } 5876 5877 if (CPolSeen & (CPolOn | CPolOff)) { 5878 Error(S, "duplicate cache policy modifier"); 5879 return MatchOperand_ParseFail; 5880 } 5881 5882 CPolSeen |= (CPolOn | CPolOff); 5883 5884 for (unsigned I = 1; I != Operands.size(); ++I) { 5885 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5886 if (Op.isCPol()) { 5887 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5888 return MatchOperand_Success; 5889 } 5890 } 5891 5892 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5893 AMDGPUOperand::ImmTyCPol)); 5894 5895 return MatchOperand_Success; 5896 } 5897 5898 static void addOptionalImmOperand( 5899 MCInst& Inst, const OperandVector& Operands, 5900 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5901 AMDGPUOperand::ImmTy ImmT, 5902 int64_t Default = 0) { 5903 auto i = OptionalIdx.find(ImmT); 5904 if (i != OptionalIdx.end()) { 5905 unsigned Idx = i->second; 5906 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5907 } else { 5908 Inst.addOperand(MCOperand::createImm(Default)); 5909 } 5910 } 5911 5912 OperandMatchResultTy 5913 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5914 StringRef &Value, 5915 SMLoc &StringLoc) { 5916 if (!trySkipId(Prefix, AsmToken::Colon)) 5917 return MatchOperand_NoMatch; 5918 5919 StringLoc = getLoc(); 5920 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5921 : MatchOperand_ParseFail; 5922 } 5923 5924 //===----------------------------------------------------------------------===// 5925 // MTBUF format 5926 //===----------------------------------------------------------------------===// 5927 5928 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5929 int64_t MaxVal, 5930 int64_t &Fmt) { 5931 int64_t Val; 5932 SMLoc Loc = getLoc(); 5933 5934 auto Res = parseIntWithPrefix(Pref, Val); 5935 if (Res == MatchOperand_ParseFail) 5936 return false; 5937 if (Res == MatchOperand_NoMatch) 5938 return true; 5939 5940 if (Val < 0 || Val > MaxVal) { 5941 Error(Loc, Twine("out of range ", StringRef(Pref))); 5942 return false; 5943 } 5944 5945 Fmt = Val; 5946 return true; 5947 } 5948 5949 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5950 // values to live in a joint format operand in the MCInst encoding. 5951 OperandMatchResultTy 5952 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5953 using namespace llvm::AMDGPU::MTBUFFormat; 5954 5955 int64_t Dfmt = DFMT_UNDEF; 5956 int64_t Nfmt = NFMT_UNDEF; 5957 5958 // dfmt and nfmt can appear in either order, and each is optional. 5959 for (int I = 0; I < 2; ++I) { 5960 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5961 return MatchOperand_ParseFail; 5962 5963 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5964 return MatchOperand_ParseFail; 5965 } 5966 // Skip optional comma between dfmt/nfmt 5967 // but guard against 2 commas following each other. 5968 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5969 !peekToken().is(AsmToken::Comma)) { 5970 trySkipToken(AsmToken::Comma); 5971 } 5972 } 5973 5974 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5975 return MatchOperand_NoMatch; 5976 5977 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5978 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5979 5980 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5981 return MatchOperand_Success; 5982 } 5983 5984 OperandMatchResultTy 5985 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5986 using namespace llvm::AMDGPU::MTBUFFormat; 5987 5988 int64_t Fmt = UFMT_UNDEF; 5989 5990 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5991 return MatchOperand_ParseFail; 5992 5993 if (Fmt == UFMT_UNDEF) 5994 return MatchOperand_NoMatch; 5995 5996 Format = Fmt; 5997 return MatchOperand_Success; 5998 } 5999 6000 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 6001 int64_t &Nfmt, 6002 StringRef FormatStr, 6003 SMLoc Loc) { 6004 using namespace llvm::AMDGPU::MTBUFFormat; 6005 int64_t Format; 6006 6007 Format = getDfmt(FormatStr); 6008 if (Format != DFMT_UNDEF) { 6009 Dfmt = Format; 6010 return true; 6011 } 6012 6013 Format = getNfmt(FormatStr, getSTI()); 6014 if (Format != NFMT_UNDEF) { 6015 Nfmt = Format; 6016 return true; 6017 } 6018 6019 Error(Loc, "unsupported format"); 6020 return false; 6021 } 6022 6023 OperandMatchResultTy 6024 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6025 SMLoc FormatLoc, 6026 int64_t &Format) { 6027 using namespace llvm::AMDGPU::MTBUFFormat; 6028 6029 int64_t Dfmt = DFMT_UNDEF; 6030 int64_t Nfmt = NFMT_UNDEF; 6031 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6032 return MatchOperand_ParseFail; 6033 6034 if (trySkipToken(AsmToken::Comma)) { 6035 StringRef Str; 6036 SMLoc Loc = getLoc(); 6037 if (!parseId(Str, "expected a format string") || 6038 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6039 return MatchOperand_ParseFail; 6040 } 6041 if (Dfmt == DFMT_UNDEF) { 6042 Error(Loc, "duplicate numeric format"); 6043 return MatchOperand_ParseFail; 6044 } else if (Nfmt == NFMT_UNDEF) { 6045 Error(Loc, "duplicate data format"); 6046 return MatchOperand_ParseFail; 6047 } 6048 } 6049 6050 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6051 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6052 6053 if (isGFX10Plus()) { 6054 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6055 if (Ufmt == UFMT_UNDEF) { 6056 Error(FormatLoc, "unsupported format"); 6057 return MatchOperand_ParseFail; 6058 } 6059 Format = Ufmt; 6060 } else { 6061 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6062 } 6063 6064 return MatchOperand_Success; 6065 } 6066 6067 OperandMatchResultTy 6068 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6069 SMLoc Loc, 6070 int64_t &Format) { 6071 using namespace llvm::AMDGPU::MTBUFFormat; 6072 6073 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6074 if (Id == UFMT_UNDEF) 6075 return MatchOperand_NoMatch; 6076 6077 if (!isGFX10Plus()) { 6078 Error(Loc, "unified format is not supported on this GPU"); 6079 return MatchOperand_ParseFail; 6080 } 6081 6082 Format = Id; 6083 return MatchOperand_Success; 6084 } 6085 6086 OperandMatchResultTy 6087 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6088 using namespace llvm::AMDGPU::MTBUFFormat; 6089 SMLoc Loc = getLoc(); 6090 6091 if (!parseExpr(Format)) 6092 return MatchOperand_ParseFail; 6093 if (!isValidFormatEncoding(Format, getSTI())) { 6094 Error(Loc, "out of range format"); 6095 return MatchOperand_ParseFail; 6096 } 6097 6098 return MatchOperand_Success; 6099 } 6100 6101 OperandMatchResultTy 6102 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6103 using namespace llvm::AMDGPU::MTBUFFormat; 6104 6105 if (!trySkipId("format", AsmToken::Colon)) 6106 return MatchOperand_NoMatch; 6107 6108 if (trySkipToken(AsmToken::LBrac)) { 6109 StringRef FormatStr; 6110 SMLoc Loc = getLoc(); 6111 if (!parseId(FormatStr, "expected a format string")) 6112 return MatchOperand_ParseFail; 6113 6114 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6115 if (Res == MatchOperand_NoMatch) 6116 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6117 if (Res != MatchOperand_Success) 6118 return Res; 6119 6120 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6121 return MatchOperand_ParseFail; 6122 6123 return MatchOperand_Success; 6124 } 6125 6126 return parseNumericFormat(Format); 6127 } 6128 6129 OperandMatchResultTy 6130 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6131 using namespace llvm::AMDGPU::MTBUFFormat; 6132 6133 int64_t Format = getDefaultFormatEncoding(getSTI()); 6134 OperandMatchResultTy Res; 6135 SMLoc Loc = getLoc(); 6136 6137 // Parse legacy format syntax. 6138 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6139 if (Res == MatchOperand_ParseFail) 6140 return Res; 6141 6142 bool FormatFound = (Res == MatchOperand_Success); 6143 6144 Operands.push_back( 6145 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6146 6147 if (FormatFound) 6148 trySkipToken(AsmToken::Comma); 6149 6150 if (isToken(AsmToken::EndOfStatement)) { 6151 // We are expecting an soffset operand, 6152 // but let matcher handle the error. 6153 return MatchOperand_Success; 6154 } 6155 6156 // Parse soffset. 6157 Res = parseRegOrImm(Operands); 6158 if (Res != MatchOperand_Success) 6159 return Res; 6160 6161 trySkipToken(AsmToken::Comma); 6162 6163 if (!FormatFound) { 6164 Res = parseSymbolicOrNumericFormat(Format); 6165 if (Res == MatchOperand_ParseFail) 6166 return Res; 6167 if (Res == MatchOperand_Success) { 6168 auto Size = Operands.size(); 6169 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6170 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6171 Op.setImm(Format); 6172 } 6173 return MatchOperand_Success; 6174 } 6175 6176 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6177 Error(getLoc(), "duplicate format"); 6178 return MatchOperand_ParseFail; 6179 } 6180 return MatchOperand_Success; 6181 } 6182 6183 //===----------------------------------------------------------------------===// 6184 // ds 6185 //===----------------------------------------------------------------------===// 6186 6187 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6188 const OperandVector &Operands) { 6189 OptionalImmIndexMap OptionalIdx; 6190 6191 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6192 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6193 6194 // Add the register arguments 6195 if (Op.isReg()) { 6196 Op.addRegOperands(Inst, 1); 6197 continue; 6198 } 6199 6200 // Handle optional arguments 6201 OptionalIdx[Op.getImmTy()] = i; 6202 } 6203 6204 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6206 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6207 6208 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6209 } 6210 6211 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6212 bool IsGdsHardcoded) { 6213 OptionalImmIndexMap OptionalIdx; 6214 6215 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6216 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6217 6218 // Add the register arguments 6219 if (Op.isReg()) { 6220 Op.addRegOperands(Inst, 1); 6221 continue; 6222 } 6223 6224 if (Op.isToken() && Op.getToken() == "gds") { 6225 IsGdsHardcoded = true; 6226 continue; 6227 } 6228 6229 // Handle optional arguments 6230 OptionalIdx[Op.getImmTy()] = i; 6231 } 6232 6233 AMDGPUOperand::ImmTy OffsetType = 6234 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6235 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6236 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6237 AMDGPUOperand::ImmTyOffset; 6238 6239 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6240 6241 if (!IsGdsHardcoded) { 6242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6243 } 6244 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6245 } 6246 6247 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6248 OptionalImmIndexMap OptionalIdx; 6249 6250 unsigned OperandIdx[4]; 6251 unsigned EnMask = 0; 6252 int SrcIdx = 0; 6253 6254 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6255 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6256 6257 // Add the register arguments 6258 if (Op.isReg()) { 6259 assert(SrcIdx < 4); 6260 OperandIdx[SrcIdx] = Inst.size(); 6261 Op.addRegOperands(Inst, 1); 6262 ++SrcIdx; 6263 continue; 6264 } 6265 6266 if (Op.isOff()) { 6267 assert(SrcIdx < 4); 6268 OperandIdx[SrcIdx] = Inst.size(); 6269 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6270 ++SrcIdx; 6271 continue; 6272 } 6273 6274 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6275 Op.addImmOperands(Inst, 1); 6276 continue; 6277 } 6278 6279 if (Op.isToken() && Op.getToken() == "done") 6280 continue; 6281 6282 // Handle optional arguments 6283 OptionalIdx[Op.getImmTy()] = i; 6284 } 6285 6286 assert(SrcIdx == 4); 6287 6288 bool Compr = false; 6289 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6290 Compr = true; 6291 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6292 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6293 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6294 } 6295 6296 for (auto i = 0; i < SrcIdx; ++i) { 6297 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6298 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6299 } 6300 } 6301 6302 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6303 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6304 6305 Inst.addOperand(MCOperand::createImm(EnMask)); 6306 } 6307 6308 //===----------------------------------------------------------------------===// 6309 // s_waitcnt 6310 //===----------------------------------------------------------------------===// 6311 6312 static bool 6313 encodeCnt( 6314 const AMDGPU::IsaVersion ISA, 6315 int64_t &IntVal, 6316 int64_t CntVal, 6317 bool Saturate, 6318 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6319 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6320 { 6321 bool Failed = false; 6322 6323 IntVal = encode(ISA, IntVal, CntVal); 6324 if (CntVal != decode(ISA, IntVal)) { 6325 if (Saturate) { 6326 IntVal = encode(ISA, IntVal, -1); 6327 } else { 6328 Failed = true; 6329 } 6330 } 6331 return Failed; 6332 } 6333 6334 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6335 6336 SMLoc CntLoc = getLoc(); 6337 StringRef CntName = getTokenStr(); 6338 6339 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6340 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6341 return false; 6342 6343 int64_t CntVal; 6344 SMLoc ValLoc = getLoc(); 6345 if (!parseExpr(CntVal)) 6346 return false; 6347 6348 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6349 6350 bool Failed = true; 6351 bool Sat = CntName.endswith("_sat"); 6352 6353 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6354 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6355 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6356 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6357 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6358 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6359 } else { 6360 Error(CntLoc, "invalid counter name " + CntName); 6361 return false; 6362 } 6363 6364 if (Failed) { 6365 Error(ValLoc, "too large value for " + CntName); 6366 return false; 6367 } 6368 6369 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6370 return false; 6371 6372 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6373 if (isToken(AsmToken::EndOfStatement)) { 6374 Error(getLoc(), "expected a counter name"); 6375 return false; 6376 } 6377 } 6378 6379 return true; 6380 } 6381 6382 OperandMatchResultTy 6383 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6384 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6385 int64_t Waitcnt = getWaitcntBitMask(ISA); 6386 SMLoc S = getLoc(); 6387 6388 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6389 while (!isToken(AsmToken::EndOfStatement)) { 6390 if (!parseCnt(Waitcnt)) 6391 return MatchOperand_ParseFail; 6392 } 6393 } else { 6394 if (!parseExpr(Waitcnt)) 6395 return MatchOperand_ParseFail; 6396 } 6397 6398 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6399 return MatchOperand_Success; 6400 } 6401 6402 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6403 SMLoc FieldLoc = getLoc(); 6404 StringRef FieldName = getTokenStr(); 6405 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6406 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6407 return false; 6408 6409 SMLoc ValueLoc = getLoc(); 6410 StringRef ValueName = getTokenStr(); 6411 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6412 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6413 return false; 6414 6415 unsigned Shift; 6416 if (FieldName == "instid0") { 6417 Shift = 0; 6418 } else if (FieldName == "instskip") { 6419 Shift = 4; 6420 } else if (FieldName == "instid1") { 6421 Shift = 7; 6422 } else { 6423 Error(FieldLoc, "invalid field name " + FieldName); 6424 return false; 6425 } 6426 6427 int Value; 6428 if (Shift == 4) { 6429 // Parse values for instskip. 6430 Value = StringSwitch<int>(ValueName) 6431 .Case("SAME", 0) 6432 .Case("NEXT", 1) 6433 .Case("SKIP_1", 2) 6434 .Case("SKIP_2", 3) 6435 .Case("SKIP_3", 4) 6436 .Case("SKIP_4", 5) 6437 .Default(-1); 6438 } else { 6439 // Parse values for instid0 and instid1. 6440 Value = StringSwitch<int>(ValueName) 6441 .Case("NO_DEP", 0) 6442 .Case("VALU_DEP_1", 1) 6443 .Case("VALU_DEP_2", 2) 6444 .Case("VALU_DEP_3", 3) 6445 .Case("VALU_DEP_4", 4) 6446 .Case("TRANS32_DEP_1", 5) 6447 .Case("TRANS32_DEP_2", 6) 6448 .Case("TRANS32_DEP_3", 7) 6449 .Case("FMA_ACCUM_CYCLE_1", 8) 6450 .Case("SALU_CYCLE_1", 9) 6451 .Case("SALU_CYCLE_2", 10) 6452 .Case("SALU_CYCLE_3", 11) 6453 .Default(-1); 6454 } 6455 if (Value < 0) { 6456 Error(ValueLoc, "invalid value name " + ValueName); 6457 return false; 6458 } 6459 6460 Delay |= Value << Shift; 6461 return true; 6462 } 6463 6464 OperandMatchResultTy 6465 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6466 int64_t Delay = 0; 6467 SMLoc S = getLoc(); 6468 6469 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6470 do { 6471 if (!parseDelay(Delay)) 6472 return MatchOperand_ParseFail; 6473 } while (trySkipToken(AsmToken::Pipe)); 6474 } else { 6475 if (!parseExpr(Delay)) 6476 return MatchOperand_ParseFail; 6477 } 6478 6479 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6480 return MatchOperand_Success; 6481 } 6482 6483 bool 6484 AMDGPUOperand::isSWaitCnt() const { 6485 return isImm(); 6486 } 6487 6488 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6489 6490 //===----------------------------------------------------------------------===// 6491 // DepCtr 6492 //===----------------------------------------------------------------------===// 6493 6494 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6495 StringRef DepCtrName) { 6496 switch (ErrorId) { 6497 case OPR_ID_UNKNOWN: 6498 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6499 return; 6500 case OPR_ID_UNSUPPORTED: 6501 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6502 return; 6503 case OPR_ID_DUPLICATE: 6504 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6505 return; 6506 case OPR_VAL_INVALID: 6507 Error(Loc, Twine("invalid value for ", DepCtrName)); 6508 return; 6509 default: 6510 assert(false); 6511 } 6512 } 6513 6514 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6515 6516 using namespace llvm::AMDGPU::DepCtr; 6517 6518 SMLoc DepCtrLoc = getLoc(); 6519 StringRef DepCtrName = getTokenStr(); 6520 6521 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6522 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6523 return false; 6524 6525 int64_t ExprVal; 6526 if (!parseExpr(ExprVal)) 6527 return false; 6528 6529 unsigned PrevOprMask = UsedOprMask; 6530 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6531 6532 if (CntVal < 0) { 6533 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6534 return false; 6535 } 6536 6537 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6538 return false; 6539 6540 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6541 if (isToken(AsmToken::EndOfStatement)) { 6542 Error(getLoc(), "expected a counter name"); 6543 return false; 6544 } 6545 } 6546 6547 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6548 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6549 return true; 6550 } 6551 6552 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6553 using namespace llvm::AMDGPU::DepCtr; 6554 6555 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6556 SMLoc Loc = getLoc(); 6557 6558 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6559 unsigned UsedOprMask = 0; 6560 while (!isToken(AsmToken::EndOfStatement)) { 6561 if (!parseDepCtr(DepCtr, UsedOprMask)) 6562 return MatchOperand_ParseFail; 6563 } 6564 } else { 6565 if (!parseExpr(DepCtr)) 6566 return MatchOperand_ParseFail; 6567 } 6568 6569 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6570 return MatchOperand_Success; 6571 } 6572 6573 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6574 6575 //===----------------------------------------------------------------------===// 6576 // hwreg 6577 //===----------------------------------------------------------------------===// 6578 6579 bool 6580 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6581 OperandInfoTy &Offset, 6582 OperandInfoTy &Width) { 6583 using namespace llvm::AMDGPU::Hwreg; 6584 6585 // The register may be specified by name or using a numeric code 6586 HwReg.Loc = getLoc(); 6587 if (isToken(AsmToken::Identifier) && 6588 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6589 HwReg.IsSymbolic = true; 6590 lex(); // skip register name 6591 } else if (!parseExpr(HwReg.Id, "a register name")) { 6592 return false; 6593 } 6594 6595 if (trySkipToken(AsmToken::RParen)) 6596 return true; 6597 6598 // parse optional params 6599 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6600 return false; 6601 6602 Offset.Loc = getLoc(); 6603 if (!parseExpr(Offset.Id)) 6604 return false; 6605 6606 if (!skipToken(AsmToken::Comma, "expected a comma")) 6607 return false; 6608 6609 Width.Loc = getLoc(); 6610 return parseExpr(Width.Id) && 6611 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6612 } 6613 6614 bool 6615 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6616 const OperandInfoTy &Offset, 6617 const OperandInfoTy &Width) { 6618 6619 using namespace llvm::AMDGPU::Hwreg; 6620 6621 if (HwReg.IsSymbolic) { 6622 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6623 Error(HwReg.Loc, 6624 "specified hardware register is not supported on this GPU"); 6625 return false; 6626 } 6627 } else { 6628 if (!isValidHwreg(HwReg.Id)) { 6629 Error(HwReg.Loc, 6630 "invalid code of hardware register: only 6-bit values are legal"); 6631 return false; 6632 } 6633 } 6634 if (!isValidHwregOffset(Offset.Id)) { 6635 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6636 return false; 6637 } 6638 if (!isValidHwregWidth(Width.Id)) { 6639 Error(Width.Loc, 6640 "invalid bitfield width: only values from 1 to 32 are legal"); 6641 return false; 6642 } 6643 return true; 6644 } 6645 6646 OperandMatchResultTy 6647 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6648 using namespace llvm::AMDGPU::Hwreg; 6649 6650 int64_t ImmVal = 0; 6651 SMLoc Loc = getLoc(); 6652 6653 if (trySkipId("hwreg", AsmToken::LParen)) { 6654 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6655 OperandInfoTy Offset(OFFSET_DEFAULT_); 6656 OperandInfoTy Width(WIDTH_DEFAULT_); 6657 if (parseHwregBody(HwReg, Offset, Width) && 6658 validateHwreg(HwReg, Offset, Width)) { 6659 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6660 } else { 6661 return MatchOperand_ParseFail; 6662 } 6663 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6664 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6665 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6666 return MatchOperand_ParseFail; 6667 } 6668 } else { 6669 return MatchOperand_ParseFail; 6670 } 6671 6672 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6673 return MatchOperand_Success; 6674 } 6675 6676 bool AMDGPUOperand::isHwreg() const { 6677 return isImmTy(ImmTyHwreg); 6678 } 6679 6680 //===----------------------------------------------------------------------===// 6681 // sendmsg 6682 //===----------------------------------------------------------------------===// 6683 6684 bool 6685 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6686 OperandInfoTy &Op, 6687 OperandInfoTy &Stream) { 6688 using namespace llvm::AMDGPU::SendMsg; 6689 6690 Msg.Loc = getLoc(); 6691 if (isToken(AsmToken::Identifier) && 6692 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6693 Msg.IsSymbolic = true; 6694 lex(); // skip message name 6695 } else if (!parseExpr(Msg.Id, "a message name")) { 6696 return false; 6697 } 6698 6699 if (trySkipToken(AsmToken::Comma)) { 6700 Op.IsDefined = true; 6701 Op.Loc = getLoc(); 6702 if (isToken(AsmToken::Identifier) && 6703 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6704 lex(); // skip operation name 6705 } else if (!parseExpr(Op.Id, "an operation name")) { 6706 return false; 6707 } 6708 6709 if (trySkipToken(AsmToken::Comma)) { 6710 Stream.IsDefined = true; 6711 Stream.Loc = getLoc(); 6712 if (!parseExpr(Stream.Id)) 6713 return false; 6714 } 6715 } 6716 6717 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6718 } 6719 6720 bool 6721 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6722 const OperandInfoTy &Op, 6723 const OperandInfoTy &Stream) { 6724 using namespace llvm::AMDGPU::SendMsg; 6725 6726 // Validation strictness depends on whether message is specified 6727 // in a symbolic or in a numeric form. In the latter case 6728 // only encoding possibility is checked. 6729 bool Strict = Msg.IsSymbolic; 6730 6731 if (Strict) { 6732 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6733 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6734 return false; 6735 } 6736 } else { 6737 if (!isValidMsgId(Msg.Id, getSTI())) { 6738 Error(Msg.Loc, "invalid message id"); 6739 return false; 6740 } 6741 } 6742 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6743 if (Op.IsDefined) { 6744 Error(Op.Loc, "message does not support operations"); 6745 } else { 6746 Error(Msg.Loc, "missing message operation"); 6747 } 6748 return false; 6749 } 6750 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6751 Error(Op.Loc, "invalid operation id"); 6752 return false; 6753 } 6754 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6755 Stream.IsDefined) { 6756 Error(Stream.Loc, "message operation does not support streams"); 6757 return false; 6758 } 6759 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6760 Error(Stream.Loc, "invalid message stream id"); 6761 return false; 6762 } 6763 return true; 6764 } 6765 6766 OperandMatchResultTy 6767 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6768 using namespace llvm::AMDGPU::SendMsg; 6769 6770 int64_t ImmVal = 0; 6771 SMLoc Loc = getLoc(); 6772 6773 if (trySkipId("sendmsg", AsmToken::LParen)) { 6774 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6775 OperandInfoTy Op(OP_NONE_); 6776 OperandInfoTy Stream(STREAM_ID_NONE_); 6777 if (parseSendMsgBody(Msg, Op, Stream) && 6778 validateSendMsg(Msg, Op, Stream)) { 6779 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6780 } else { 6781 return MatchOperand_ParseFail; 6782 } 6783 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6784 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6785 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6786 return MatchOperand_ParseFail; 6787 } 6788 } else { 6789 return MatchOperand_ParseFail; 6790 } 6791 6792 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6793 return MatchOperand_Success; 6794 } 6795 6796 bool AMDGPUOperand::isSendMsg() const { 6797 return isImmTy(ImmTySendMsg); 6798 } 6799 6800 //===----------------------------------------------------------------------===// 6801 // v_interp 6802 //===----------------------------------------------------------------------===// 6803 6804 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6805 StringRef Str; 6806 SMLoc S = getLoc(); 6807 6808 if (!parseId(Str)) 6809 return MatchOperand_NoMatch; 6810 6811 int Slot = StringSwitch<int>(Str) 6812 .Case("p10", 0) 6813 .Case("p20", 1) 6814 .Case("p0", 2) 6815 .Default(-1); 6816 6817 if (Slot == -1) { 6818 Error(S, "invalid interpolation slot"); 6819 return MatchOperand_ParseFail; 6820 } 6821 6822 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6823 AMDGPUOperand::ImmTyInterpSlot)); 6824 return MatchOperand_Success; 6825 } 6826 6827 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6828 StringRef Str; 6829 SMLoc S = getLoc(); 6830 6831 if (!parseId(Str)) 6832 return MatchOperand_NoMatch; 6833 6834 if (!Str.startswith("attr")) { 6835 Error(S, "invalid interpolation attribute"); 6836 return MatchOperand_ParseFail; 6837 } 6838 6839 StringRef Chan = Str.take_back(2); 6840 int AttrChan = StringSwitch<int>(Chan) 6841 .Case(".x", 0) 6842 .Case(".y", 1) 6843 .Case(".z", 2) 6844 .Case(".w", 3) 6845 .Default(-1); 6846 if (AttrChan == -1) { 6847 Error(S, "invalid or missing interpolation attribute channel"); 6848 return MatchOperand_ParseFail; 6849 } 6850 6851 Str = Str.drop_back(2).drop_front(4); 6852 6853 uint8_t Attr; 6854 if (Str.getAsInteger(10, Attr)) { 6855 Error(S, "invalid or missing interpolation attribute number"); 6856 return MatchOperand_ParseFail; 6857 } 6858 6859 if (Attr > 63) { 6860 Error(S, "out of bounds interpolation attribute number"); 6861 return MatchOperand_ParseFail; 6862 } 6863 6864 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6865 6866 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6867 AMDGPUOperand::ImmTyInterpAttr)); 6868 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6869 AMDGPUOperand::ImmTyAttrChan)); 6870 return MatchOperand_Success; 6871 } 6872 6873 //===----------------------------------------------------------------------===// 6874 // exp 6875 //===----------------------------------------------------------------------===// 6876 6877 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6878 using namespace llvm::AMDGPU::Exp; 6879 6880 StringRef Str; 6881 SMLoc S = getLoc(); 6882 6883 if (!parseId(Str)) 6884 return MatchOperand_NoMatch; 6885 6886 unsigned Id = getTgtId(Str); 6887 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6888 Error(S, (Id == ET_INVALID) ? 6889 "invalid exp target" : 6890 "exp target is not supported on this GPU"); 6891 return MatchOperand_ParseFail; 6892 } 6893 6894 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6895 AMDGPUOperand::ImmTyExpTgt)); 6896 return MatchOperand_Success; 6897 } 6898 6899 //===----------------------------------------------------------------------===// 6900 // parser helpers 6901 //===----------------------------------------------------------------------===// 6902 6903 bool 6904 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6905 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6906 } 6907 6908 bool 6909 AMDGPUAsmParser::isId(const StringRef Id) const { 6910 return isId(getToken(), Id); 6911 } 6912 6913 bool 6914 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6915 return getTokenKind() == Kind; 6916 } 6917 6918 bool 6919 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6920 if (isId(Id)) { 6921 lex(); 6922 return true; 6923 } 6924 return false; 6925 } 6926 6927 bool 6928 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6929 if (isToken(AsmToken::Identifier)) { 6930 StringRef Tok = getTokenStr(); 6931 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6932 lex(); 6933 return true; 6934 } 6935 } 6936 return false; 6937 } 6938 6939 bool 6940 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6941 if (isId(Id) && peekToken().is(Kind)) { 6942 lex(); 6943 lex(); 6944 return true; 6945 } 6946 return false; 6947 } 6948 6949 bool 6950 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6951 if (isToken(Kind)) { 6952 lex(); 6953 return true; 6954 } 6955 return false; 6956 } 6957 6958 bool 6959 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6960 const StringRef ErrMsg) { 6961 if (!trySkipToken(Kind)) { 6962 Error(getLoc(), ErrMsg); 6963 return false; 6964 } 6965 return true; 6966 } 6967 6968 bool 6969 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6970 SMLoc S = getLoc(); 6971 6972 const MCExpr *Expr; 6973 if (Parser.parseExpression(Expr)) 6974 return false; 6975 6976 if (Expr->evaluateAsAbsolute(Imm)) 6977 return true; 6978 6979 if (Expected.empty()) { 6980 Error(S, "expected absolute expression"); 6981 } else { 6982 Error(S, Twine("expected ", Expected) + 6983 Twine(" or an absolute expression")); 6984 } 6985 return false; 6986 } 6987 6988 bool 6989 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6990 SMLoc S = getLoc(); 6991 6992 const MCExpr *Expr; 6993 if (Parser.parseExpression(Expr)) 6994 return false; 6995 6996 int64_t IntVal; 6997 if (Expr->evaluateAsAbsolute(IntVal)) { 6998 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6999 } else { 7000 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 7001 } 7002 return true; 7003 } 7004 7005 bool 7006 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7007 if (isToken(AsmToken::String)) { 7008 Val = getToken().getStringContents(); 7009 lex(); 7010 return true; 7011 } else { 7012 Error(getLoc(), ErrMsg); 7013 return false; 7014 } 7015 } 7016 7017 bool 7018 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7019 if (isToken(AsmToken::Identifier)) { 7020 Val = getTokenStr(); 7021 lex(); 7022 return true; 7023 } else { 7024 if (!ErrMsg.empty()) 7025 Error(getLoc(), ErrMsg); 7026 return false; 7027 } 7028 } 7029 7030 AsmToken 7031 AMDGPUAsmParser::getToken() const { 7032 return Parser.getTok(); 7033 } 7034 7035 AsmToken 7036 AMDGPUAsmParser::peekToken() { 7037 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7038 } 7039 7040 void 7041 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7042 auto TokCount = getLexer().peekTokens(Tokens); 7043 7044 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7045 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7046 } 7047 7048 AsmToken::TokenKind 7049 AMDGPUAsmParser::getTokenKind() const { 7050 return getLexer().getKind(); 7051 } 7052 7053 SMLoc 7054 AMDGPUAsmParser::getLoc() const { 7055 return getToken().getLoc(); 7056 } 7057 7058 StringRef 7059 AMDGPUAsmParser::getTokenStr() const { 7060 return getToken().getString(); 7061 } 7062 7063 void 7064 AMDGPUAsmParser::lex() { 7065 Parser.Lex(); 7066 } 7067 7068 SMLoc 7069 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7070 const OperandVector &Operands) const { 7071 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7072 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7073 if (Test(Op)) 7074 return Op.getStartLoc(); 7075 } 7076 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7077 } 7078 7079 SMLoc 7080 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7081 const OperandVector &Operands) const { 7082 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7083 return getOperandLoc(Test, Operands); 7084 } 7085 7086 SMLoc 7087 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7088 const OperandVector &Operands) const { 7089 auto Test = [=](const AMDGPUOperand& Op) { 7090 return Op.isRegKind() && Op.getReg() == Reg; 7091 }; 7092 return getOperandLoc(Test, Operands); 7093 } 7094 7095 SMLoc 7096 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7097 auto Test = [](const AMDGPUOperand& Op) { 7098 return Op.IsImmKindLiteral() || Op.isExpr(); 7099 }; 7100 return getOperandLoc(Test, Operands); 7101 } 7102 7103 SMLoc 7104 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7105 auto Test = [](const AMDGPUOperand& Op) { 7106 return Op.isImmKindConst(); 7107 }; 7108 return getOperandLoc(Test, Operands); 7109 } 7110 7111 //===----------------------------------------------------------------------===// 7112 // swizzle 7113 //===----------------------------------------------------------------------===// 7114 7115 LLVM_READNONE 7116 static unsigned 7117 encodeBitmaskPerm(const unsigned AndMask, 7118 const unsigned OrMask, 7119 const unsigned XorMask) { 7120 using namespace llvm::AMDGPU::Swizzle; 7121 7122 return BITMASK_PERM_ENC | 7123 (AndMask << BITMASK_AND_SHIFT) | 7124 (OrMask << BITMASK_OR_SHIFT) | 7125 (XorMask << BITMASK_XOR_SHIFT); 7126 } 7127 7128 bool 7129 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7130 const unsigned MinVal, 7131 const unsigned MaxVal, 7132 const StringRef ErrMsg, 7133 SMLoc &Loc) { 7134 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7135 return false; 7136 } 7137 Loc = getLoc(); 7138 if (!parseExpr(Op)) { 7139 return false; 7140 } 7141 if (Op < MinVal || Op > MaxVal) { 7142 Error(Loc, ErrMsg); 7143 return false; 7144 } 7145 7146 return true; 7147 } 7148 7149 bool 7150 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7151 const unsigned MinVal, 7152 const unsigned MaxVal, 7153 const StringRef ErrMsg) { 7154 SMLoc Loc; 7155 for (unsigned i = 0; i < OpNum; ++i) { 7156 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7157 return false; 7158 } 7159 7160 return true; 7161 } 7162 7163 bool 7164 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7165 using namespace llvm::AMDGPU::Swizzle; 7166 7167 int64_t Lane[LANE_NUM]; 7168 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7169 "expected a 2-bit lane id")) { 7170 Imm = QUAD_PERM_ENC; 7171 for (unsigned I = 0; I < LANE_NUM; ++I) { 7172 Imm |= Lane[I] << (LANE_SHIFT * I); 7173 } 7174 return true; 7175 } 7176 return false; 7177 } 7178 7179 bool 7180 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7181 using namespace llvm::AMDGPU::Swizzle; 7182 7183 SMLoc Loc; 7184 int64_t GroupSize; 7185 int64_t LaneIdx; 7186 7187 if (!parseSwizzleOperand(GroupSize, 7188 2, 32, 7189 "group size must be in the interval [2,32]", 7190 Loc)) { 7191 return false; 7192 } 7193 if (!isPowerOf2_64(GroupSize)) { 7194 Error(Loc, "group size must be a power of two"); 7195 return false; 7196 } 7197 if (parseSwizzleOperand(LaneIdx, 7198 0, GroupSize - 1, 7199 "lane id must be in the interval [0,group size - 1]", 7200 Loc)) { 7201 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7202 return true; 7203 } 7204 return false; 7205 } 7206 7207 bool 7208 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7209 using namespace llvm::AMDGPU::Swizzle; 7210 7211 SMLoc Loc; 7212 int64_t GroupSize; 7213 7214 if (!parseSwizzleOperand(GroupSize, 7215 2, 32, 7216 "group size must be in the interval [2,32]", 7217 Loc)) { 7218 return false; 7219 } 7220 if (!isPowerOf2_64(GroupSize)) { 7221 Error(Loc, "group size must be a power of two"); 7222 return false; 7223 } 7224 7225 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7226 return true; 7227 } 7228 7229 bool 7230 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7231 using namespace llvm::AMDGPU::Swizzle; 7232 7233 SMLoc Loc; 7234 int64_t GroupSize; 7235 7236 if (!parseSwizzleOperand(GroupSize, 7237 1, 16, 7238 "group size must be in the interval [1,16]", 7239 Loc)) { 7240 return false; 7241 } 7242 if (!isPowerOf2_64(GroupSize)) { 7243 Error(Loc, "group size must be a power of two"); 7244 return false; 7245 } 7246 7247 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7248 return true; 7249 } 7250 7251 bool 7252 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7253 using namespace llvm::AMDGPU::Swizzle; 7254 7255 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7256 return false; 7257 } 7258 7259 StringRef Ctl; 7260 SMLoc StrLoc = getLoc(); 7261 if (!parseString(Ctl)) { 7262 return false; 7263 } 7264 if (Ctl.size() != BITMASK_WIDTH) { 7265 Error(StrLoc, "expected a 5-character mask"); 7266 return false; 7267 } 7268 7269 unsigned AndMask = 0; 7270 unsigned OrMask = 0; 7271 unsigned XorMask = 0; 7272 7273 for (size_t i = 0; i < Ctl.size(); ++i) { 7274 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7275 switch(Ctl[i]) { 7276 default: 7277 Error(StrLoc, "invalid mask"); 7278 return false; 7279 case '0': 7280 break; 7281 case '1': 7282 OrMask |= Mask; 7283 break; 7284 case 'p': 7285 AndMask |= Mask; 7286 break; 7287 case 'i': 7288 AndMask |= Mask; 7289 XorMask |= Mask; 7290 break; 7291 } 7292 } 7293 7294 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7295 return true; 7296 } 7297 7298 bool 7299 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7300 7301 SMLoc OffsetLoc = getLoc(); 7302 7303 if (!parseExpr(Imm, "a swizzle macro")) { 7304 return false; 7305 } 7306 if (!isUInt<16>(Imm)) { 7307 Error(OffsetLoc, "expected a 16-bit offset"); 7308 return false; 7309 } 7310 return true; 7311 } 7312 7313 bool 7314 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7315 using namespace llvm::AMDGPU::Swizzle; 7316 7317 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7318 7319 SMLoc ModeLoc = getLoc(); 7320 bool Ok = false; 7321 7322 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7323 Ok = parseSwizzleQuadPerm(Imm); 7324 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7325 Ok = parseSwizzleBitmaskPerm(Imm); 7326 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7327 Ok = parseSwizzleBroadcast(Imm); 7328 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7329 Ok = parseSwizzleSwap(Imm); 7330 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7331 Ok = parseSwizzleReverse(Imm); 7332 } else { 7333 Error(ModeLoc, "expected a swizzle mode"); 7334 } 7335 7336 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7337 } 7338 7339 return false; 7340 } 7341 7342 OperandMatchResultTy 7343 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7344 SMLoc S = getLoc(); 7345 int64_t Imm = 0; 7346 7347 if (trySkipId("offset")) { 7348 7349 bool Ok = false; 7350 if (skipToken(AsmToken::Colon, "expected a colon")) { 7351 if (trySkipId("swizzle")) { 7352 Ok = parseSwizzleMacro(Imm); 7353 } else { 7354 Ok = parseSwizzleOffset(Imm); 7355 } 7356 } 7357 7358 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7359 7360 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7361 } else { 7362 // Swizzle "offset" operand is optional. 7363 // If it is omitted, try parsing other optional operands. 7364 return parseOptionalOpr(Operands); 7365 } 7366 } 7367 7368 bool 7369 AMDGPUOperand::isSwizzle() const { 7370 return isImmTy(ImmTySwizzle); 7371 } 7372 7373 //===----------------------------------------------------------------------===// 7374 // VGPR Index Mode 7375 //===----------------------------------------------------------------------===// 7376 7377 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7378 7379 using namespace llvm::AMDGPU::VGPRIndexMode; 7380 7381 if (trySkipToken(AsmToken::RParen)) { 7382 return OFF; 7383 } 7384 7385 int64_t Imm = 0; 7386 7387 while (true) { 7388 unsigned Mode = 0; 7389 SMLoc S = getLoc(); 7390 7391 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7392 if (trySkipId(IdSymbolic[ModeId])) { 7393 Mode = 1 << ModeId; 7394 break; 7395 } 7396 } 7397 7398 if (Mode == 0) { 7399 Error(S, (Imm == 0)? 7400 "expected a VGPR index mode or a closing parenthesis" : 7401 "expected a VGPR index mode"); 7402 return UNDEF; 7403 } 7404 7405 if (Imm & Mode) { 7406 Error(S, "duplicate VGPR index mode"); 7407 return UNDEF; 7408 } 7409 Imm |= Mode; 7410 7411 if (trySkipToken(AsmToken::RParen)) 7412 break; 7413 if (!skipToken(AsmToken::Comma, 7414 "expected a comma or a closing parenthesis")) 7415 return UNDEF; 7416 } 7417 7418 return Imm; 7419 } 7420 7421 OperandMatchResultTy 7422 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7423 7424 using namespace llvm::AMDGPU::VGPRIndexMode; 7425 7426 int64_t Imm = 0; 7427 SMLoc S = getLoc(); 7428 7429 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7430 Imm = parseGPRIdxMacro(); 7431 if (Imm == UNDEF) 7432 return MatchOperand_ParseFail; 7433 } else { 7434 if (getParser().parseAbsoluteExpression(Imm)) 7435 return MatchOperand_ParseFail; 7436 if (Imm < 0 || !isUInt<4>(Imm)) { 7437 Error(S, "invalid immediate: only 4-bit values are legal"); 7438 return MatchOperand_ParseFail; 7439 } 7440 } 7441 7442 Operands.push_back( 7443 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7444 return MatchOperand_Success; 7445 } 7446 7447 bool AMDGPUOperand::isGPRIdxMode() const { 7448 return isImmTy(ImmTyGprIdxMode); 7449 } 7450 7451 //===----------------------------------------------------------------------===// 7452 // sopp branch targets 7453 //===----------------------------------------------------------------------===// 7454 7455 OperandMatchResultTy 7456 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7457 7458 // Make sure we are not parsing something 7459 // that looks like a label or an expression but is not. 7460 // This will improve error messages. 7461 if (isRegister() || isModifier()) 7462 return MatchOperand_NoMatch; 7463 7464 if (!parseExpr(Operands)) 7465 return MatchOperand_ParseFail; 7466 7467 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7468 assert(Opr.isImm() || Opr.isExpr()); 7469 SMLoc Loc = Opr.getStartLoc(); 7470 7471 // Currently we do not support arbitrary expressions as branch targets. 7472 // Only labels and absolute expressions are accepted. 7473 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7474 Error(Loc, "expected an absolute expression or a label"); 7475 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7476 Error(Loc, "expected a 16-bit signed jump offset"); 7477 } 7478 7479 return MatchOperand_Success; 7480 } 7481 7482 //===----------------------------------------------------------------------===// 7483 // Boolean holding registers 7484 //===----------------------------------------------------------------------===// 7485 7486 OperandMatchResultTy 7487 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7488 return parseReg(Operands); 7489 } 7490 7491 //===----------------------------------------------------------------------===// 7492 // mubuf 7493 //===----------------------------------------------------------------------===// 7494 7495 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7496 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7497 } 7498 7499 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7500 const OperandVector &Operands, 7501 bool IsAtomic, 7502 bool IsLds) { 7503 OptionalImmIndexMap OptionalIdx; 7504 unsigned FirstOperandIdx = 1; 7505 bool IsAtomicReturn = false; 7506 7507 if (IsAtomic) { 7508 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7509 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7510 if (!Op.isCPol()) 7511 continue; 7512 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7513 break; 7514 } 7515 7516 if (!IsAtomicReturn) { 7517 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7518 if (NewOpc != -1) 7519 Inst.setOpcode(NewOpc); 7520 } 7521 7522 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7523 SIInstrFlags::IsAtomicRet; 7524 } 7525 7526 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7527 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7528 7529 // Add the register arguments 7530 if (Op.isReg()) { 7531 Op.addRegOperands(Inst, 1); 7532 // Insert a tied src for atomic return dst. 7533 // This cannot be postponed as subsequent calls to 7534 // addImmOperands rely on correct number of MC operands. 7535 if (IsAtomicReturn && i == FirstOperandIdx) 7536 Op.addRegOperands(Inst, 1); 7537 continue; 7538 } 7539 7540 // Handle the case where soffset is an immediate 7541 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7542 Op.addImmOperands(Inst, 1); 7543 continue; 7544 } 7545 7546 // Handle tokens like 'offen' which are sometimes hard-coded into the 7547 // asm string. There are no MCInst operands for these. 7548 if (Op.isToken()) { 7549 continue; 7550 } 7551 assert(Op.isImm()); 7552 7553 // Handle optional arguments 7554 OptionalIdx[Op.getImmTy()] = i; 7555 } 7556 7557 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7558 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7559 7560 if (!IsLds) { // tfe is not legal with lds opcodes 7561 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7562 } 7563 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7564 } 7565 7566 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7567 OptionalImmIndexMap OptionalIdx; 7568 7569 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7570 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7571 7572 // Add the register arguments 7573 if (Op.isReg()) { 7574 Op.addRegOperands(Inst, 1); 7575 continue; 7576 } 7577 7578 // Handle the case where soffset is an immediate 7579 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7580 Op.addImmOperands(Inst, 1); 7581 continue; 7582 } 7583 7584 // Handle tokens like 'offen' which are sometimes hard-coded into the 7585 // asm string. There are no MCInst operands for these. 7586 if (Op.isToken()) { 7587 continue; 7588 } 7589 assert(Op.isImm()); 7590 7591 // Handle optional arguments 7592 OptionalIdx[Op.getImmTy()] = i; 7593 } 7594 7595 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7596 AMDGPUOperand::ImmTyOffset); 7597 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7598 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7599 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7601 } 7602 7603 //===----------------------------------------------------------------------===// 7604 // mimg 7605 //===----------------------------------------------------------------------===// 7606 7607 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7608 bool IsAtomic) { 7609 unsigned I = 1; 7610 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7611 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7612 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7613 } 7614 7615 if (IsAtomic) { 7616 // Add src, same as dst 7617 assert(Desc.getNumDefs() == 1); 7618 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7619 } 7620 7621 OptionalImmIndexMap OptionalIdx; 7622 7623 for (unsigned E = Operands.size(); I != E; ++I) { 7624 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7625 7626 // Add the register arguments 7627 if (Op.isReg()) { 7628 Op.addRegOperands(Inst, 1); 7629 } else if (Op.isImmModifier()) { 7630 OptionalIdx[Op.getImmTy()] = I; 7631 } else if (!Op.isToken()) { 7632 llvm_unreachable("unexpected operand type"); 7633 } 7634 } 7635 7636 bool IsGFX10Plus = isGFX10Plus(); 7637 7638 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7639 if (IsGFX10Plus) 7640 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7642 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7643 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7644 if (IsGFX10Plus) 7645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7646 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7647 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7648 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7649 if (!IsGFX10Plus) 7650 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7651 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7652 } 7653 7654 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7655 cvtMIMG(Inst, Operands, true); 7656 } 7657 7658 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7659 OptionalImmIndexMap OptionalIdx; 7660 bool IsAtomicReturn = false; 7661 7662 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7663 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7664 if (!Op.isCPol()) 7665 continue; 7666 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7667 break; 7668 } 7669 7670 if (!IsAtomicReturn) { 7671 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7672 if (NewOpc != -1) 7673 Inst.setOpcode(NewOpc); 7674 } 7675 7676 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7677 SIInstrFlags::IsAtomicRet; 7678 7679 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7680 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7681 7682 // Add the register arguments 7683 if (Op.isReg()) { 7684 Op.addRegOperands(Inst, 1); 7685 if (IsAtomicReturn && i == 1) 7686 Op.addRegOperands(Inst, 1); 7687 continue; 7688 } 7689 7690 // Handle the case where soffset is an immediate 7691 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7692 Op.addImmOperands(Inst, 1); 7693 continue; 7694 } 7695 7696 // Handle tokens like 'offen' which are sometimes hard-coded into the 7697 // asm string. There are no MCInst operands for these. 7698 if (Op.isToken()) { 7699 continue; 7700 } 7701 assert(Op.isImm()); 7702 7703 // Handle optional arguments 7704 OptionalIdx[Op.getImmTy()] = i; 7705 } 7706 7707 if ((int)Inst.getNumOperands() <= 7708 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7710 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7711 } 7712 7713 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7714 const OperandVector &Operands) { 7715 for (unsigned I = 1; I < Operands.size(); ++I) { 7716 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7717 if (Operand.isReg()) 7718 Operand.addRegOperands(Inst, 1); 7719 } 7720 7721 Inst.addOperand(MCOperand::createImm(1)); // a16 7722 } 7723 7724 //===----------------------------------------------------------------------===// 7725 // smrd 7726 //===----------------------------------------------------------------------===// 7727 7728 bool AMDGPUOperand::isSMRDOffset8() const { 7729 return isImm() && isUInt<8>(getImm()); 7730 } 7731 7732 bool AMDGPUOperand::isSMEMOffset() const { 7733 return isImm(); // Offset range is checked later by validator. 7734 } 7735 7736 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7737 // 32-bit literals are only supported on CI and we only want to use them 7738 // when the offset is > 8-bits. 7739 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7740 } 7741 7742 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7743 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7744 } 7745 7746 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7747 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7748 } 7749 7750 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7751 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7752 } 7753 7754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7755 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7756 } 7757 7758 //===----------------------------------------------------------------------===// 7759 // vop3 7760 //===----------------------------------------------------------------------===// 7761 7762 static bool ConvertOmodMul(int64_t &Mul) { 7763 if (Mul != 1 && Mul != 2 && Mul != 4) 7764 return false; 7765 7766 Mul >>= 1; 7767 return true; 7768 } 7769 7770 static bool ConvertOmodDiv(int64_t &Div) { 7771 if (Div == 1) { 7772 Div = 0; 7773 return true; 7774 } 7775 7776 if (Div == 2) { 7777 Div = 3; 7778 return true; 7779 } 7780 7781 return false; 7782 } 7783 7784 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7785 // This is intentional and ensures compatibility with sp3. 7786 // See bug 35397 for details. 7787 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7788 if (BoundCtrl == 0 || BoundCtrl == 1) { 7789 BoundCtrl = 1; 7790 return true; 7791 } 7792 return false; 7793 } 7794 7795 // Note: the order in this table matches the order of operands in AsmString. 7796 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7797 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7798 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7799 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7800 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7801 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7802 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7803 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7804 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7805 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7806 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7807 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7808 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7809 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7810 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7811 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7812 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7813 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7814 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7815 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7816 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7817 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7818 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7819 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7820 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7821 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7822 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7823 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7824 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7825 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7826 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7827 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7828 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7829 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7830 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7831 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7832 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7833 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7834 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7835 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7836 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7837 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7838 }; 7839 7840 void AMDGPUAsmParser::onBeginOfFile() { 7841 if (!getParser().getStreamer().getTargetStreamer() || 7842 getSTI().getTargetTriple().getArch() == Triple::r600) 7843 return; 7844 7845 if (!getTargetStreamer().getTargetID()) 7846 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7847 7848 if (isHsaAbiVersion3AndAbove(&getSTI())) 7849 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7850 } 7851 7852 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7853 7854 OperandMatchResultTy res = parseOptionalOpr(Operands); 7855 7856 // This is a hack to enable hardcoded mandatory operands which follow 7857 // optional operands. 7858 // 7859 // Current design assumes that all operands after the first optional operand 7860 // are also optional. However implementation of some instructions violates 7861 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7862 // 7863 // To alleviate this problem, we have to (implicitly) parse extra operands 7864 // to make sure autogenerated parser of custom operands never hit hardcoded 7865 // mandatory operands. 7866 7867 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7868 if (res != MatchOperand_Success || 7869 isToken(AsmToken::EndOfStatement)) 7870 break; 7871 7872 trySkipToken(AsmToken::Comma); 7873 res = parseOptionalOpr(Operands); 7874 } 7875 7876 return res; 7877 } 7878 7879 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7880 OperandMatchResultTy res; 7881 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7882 // try to parse any optional operand here 7883 if (Op.IsBit) { 7884 res = parseNamedBit(Op.Name, Operands, Op.Type); 7885 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7886 res = parseOModOperand(Operands); 7887 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7888 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7889 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7890 res = parseSDWASel(Operands, Op.Name, Op.Type); 7891 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7892 res = parseSDWADstUnused(Operands); 7893 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7894 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7895 Op.Type == AMDGPUOperand::ImmTyNegLo || 7896 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7897 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7898 Op.ConvertResult); 7899 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7900 res = parseDim(Operands); 7901 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7902 res = parseCPol(Operands); 7903 } else { 7904 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7905 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7906 res = parseOperandArrayWithPrefix("neg", Operands, 7907 AMDGPUOperand::ImmTyBLGP, 7908 nullptr); 7909 } 7910 } 7911 if (res != MatchOperand_NoMatch) { 7912 return res; 7913 } 7914 } 7915 return MatchOperand_NoMatch; 7916 } 7917 7918 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7919 StringRef Name = getTokenStr(); 7920 if (Name == "mul") { 7921 return parseIntWithPrefix("mul", Operands, 7922 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7923 } 7924 7925 if (Name == "div") { 7926 return parseIntWithPrefix("div", Operands, 7927 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7928 } 7929 7930 return MatchOperand_NoMatch; 7931 } 7932 7933 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7934 cvtVOP3P(Inst, Operands); 7935 7936 int Opc = Inst.getOpcode(); 7937 7938 int SrcNum; 7939 const int Ops[] = { AMDGPU::OpName::src0, 7940 AMDGPU::OpName::src1, 7941 AMDGPU::OpName::src2 }; 7942 for (SrcNum = 0; 7943 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7944 ++SrcNum); 7945 assert(SrcNum > 0); 7946 7947 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7948 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7949 7950 if ((OpSel & (1 << SrcNum)) != 0) { 7951 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7952 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7953 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7954 } 7955 } 7956 7957 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7958 // 1. This operand is input modifiers 7959 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7960 // 2. This is not last operand 7961 && Desc.NumOperands > (OpNum + 1) 7962 // 3. Next operand is register class 7963 && Desc.OpInfo[OpNum + 1].RegClass != -1 7964 // 4. Next register is not tied to any other operand 7965 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7966 } 7967 7968 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7969 { 7970 OptionalImmIndexMap OptionalIdx; 7971 unsigned Opc = Inst.getOpcode(); 7972 7973 unsigned I = 1; 7974 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7975 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7976 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7977 } 7978 7979 for (unsigned E = Operands.size(); I != E; ++I) { 7980 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7981 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7982 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7983 } else if (Op.isInterpSlot() || 7984 Op.isInterpAttr() || 7985 Op.isAttrChan()) { 7986 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7987 } else if (Op.isImmModifier()) { 7988 OptionalIdx[Op.getImmTy()] = I; 7989 } else { 7990 llvm_unreachable("unhandled operand type"); 7991 } 7992 } 7993 7994 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7995 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7996 } 7997 7998 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7999 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8000 } 8001 8002 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8003 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8004 } 8005 } 8006 8007 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8008 OptionalImmIndexMap &OptionalIdx) { 8009 unsigned Opc = Inst.getOpcode(); 8010 8011 unsigned I = 1; 8012 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8013 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8014 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8015 } 8016 8017 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8018 // This instruction has src modifiers 8019 for (unsigned E = Operands.size(); I != E; ++I) { 8020 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8021 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8022 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8023 } else if (Op.isImmModifier()) { 8024 OptionalIdx[Op.getImmTy()] = I; 8025 } else if (Op.isRegOrImm()) { 8026 Op.addRegOrImmOperands(Inst, 1); 8027 } else { 8028 llvm_unreachable("unhandled operand type"); 8029 } 8030 } 8031 } else { 8032 // No src modifiers 8033 for (unsigned E = Operands.size(); I != E; ++I) { 8034 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8035 if (Op.isMod()) { 8036 OptionalIdx[Op.getImmTy()] = I; 8037 } else { 8038 Op.addRegOrImmOperands(Inst, 1); 8039 } 8040 } 8041 } 8042 8043 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8044 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8045 } 8046 8047 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8048 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8049 } 8050 8051 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8052 // it has src2 register operand that is tied to dst operand 8053 // we don't allow modifiers for this operand in assembler so src2_modifiers 8054 // should be 0. 8055 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8056 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8057 Opc == AMDGPU::V_MAC_F32_e64_vi || 8058 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8059 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8060 Opc == AMDGPU::V_MAC_F16_e64_vi || 8061 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8062 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8063 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8064 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8065 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 8066 auto it = Inst.begin(); 8067 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8068 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8069 ++it; 8070 // Copy the operand to ensure it's not invalidated when Inst grows. 8071 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8072 } 8073 } 8074 8075 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8076 OptionalImmIndexMap OptionalIdx; 8077 cvtVOP3(Inst, Operands, OptionalIdx); 8078 } 8079 8080 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8081 OptionalImmIndexMap &OptIdx) { 8082 const int Opc = Inst.getOpcode(); 8083 const MCInstrDesc &Desc = MII.get(Opc); 8084 8085 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8086 8087 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8088 assert(!IsPacked); 8089 Inst.addOperand(Inst.getOperand(0)); 8090 } 8091 8092 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8093 // instruction, and then figure out where to actually put the modifiers 8094 8095 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8096 if (OpSelIdx != -1) { 8097 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8098 } 8099 8100 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8101 if (OpSelHiIdx != -1) { 8102 int DefaultVal = IsPacked ? -1 : 0; 8103 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8104 DefaultVal); 8105 } 8106 8107 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8108 if (NegLoIdx != -1) { 8109 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8110 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8111 } 8112 8113 const int Ops[] = { AMDGPU::OpName::src0, 8114 AMDGPU::OpName::src1, 8115 AMDGPU::OpName::src2 }; 8116 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8117 AMDGPU::OpName::src1_modifiers, 8118 AMDGPU::OpName::src2_modifiers }; 8119 8120 unsigned OpSel = 0; 8121 unsigned OpSelHi = 0; 8122 unsigned NegLo = 0; 8123 unsigned NegHi = 0; 8124 8125 if (OpSelIdx != -1) 8126 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8127 8128 if (OpSelHiIdx != -1) 8129 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8130 8131 if (NegLoIdx != -1) { 8132 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8133 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8134 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8135 } 8136 8137 for (int J = 0; J < 3; ++J) { 8138 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8139 if (OpIdx == -1) 8140 break; 8141 8142 uint32_t ModVal = 0; 8143 8144 if ((OpSel & (1 << J)) != 0) 8145 ModVal |= SISrcMods::OP_SEL_0; 8146 8147 if ((OpSelHi & (1 << J)) != 0) 8148 ModVal |= SISrcMods::OP_SEL_1; 8149 8150 if ((NegLo & (1 << J)) != 0) 8151 ModVal |= SISrcMods::NEG; 8152 8153 if ((NegHi & (1 << J)) != 0) 8154 ModVal |= SISrcMods::NEG_HI; 8155 8156 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8157 8158 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8159 } 8160 } 8161 8162 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8163 OptionalImmIndexMap OptIdx; 8164 cvtVOP3(Inst, Operands, OptIdx); 8165 cvtVOP3P(Inst, Operands, OptIdx); 8166 } 8167 8168 //===----------------------------------------------------------------------===// 8169 // dpp 8170 //===----------------------------------------------------------------------===// 8171 8172 bool AMDGPUOperand::isDPP8() const { 8173 return isImmTy(ImmTyDPP8); 8174 } 8175 8176 bool AMDGPUOperand::isDPPCtrl() const { 8177 using namespace AMDGPU::DPP; 8178 8179 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8180 if (result) { 8181 int64_t Imm = getImm(); 8182 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8183 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8184 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8185 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8186 (Imm == DppCtrl::WAVE_SHL1) || 8187 (Imm == DppCtrl::WAVE_ROL1) || 8188 (Imm == DppCtrl::WAVE_SHR1) || 8189 (Imm == DppCtrl::WAVE_ROR1) || 8190 (Imm == DppCtrl::ROW_MIRROR) || 8191 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8192 (Imm == DppCtrl::BCAST15) || 8193 (Imm == DppCtrl::BCAST31) || 8194 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8195 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8196 } 8197 return false; 8198 } 8199 8200 //===----------------------------------------------------------------------===// 8201 // mAI 8202 //===----------------------------------------------------------------------===// 8203 8204 bool AMDGPUOperand::isBLGP() const { 8205 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8206 } 8207 8208 bool AMDGPUOperand::isCBSZ() const { 8209 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8210 } 8211 8212 bool AMDGPUOperand::isABID() const { 8213 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8214 } 8215 8216 bool AMDGPUOperand::isS16Imm() const { 8217 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8218 } 8219 8220 bool AMDGPUOperand::isU16Imm() const { 8221 return isImm() && isUInt<16>(getImm()); 8222 } 8223 8224 //===----------------------------------------------------------------------===// 8225 // dim 8226 //===----------------------------------------------------------------------===// 8227 8228 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8229 // We want to allow "dim:1D" etc., 8230 // but the initial 1 is tokenized as an integer. 8231 std::string Token; 8232 if (isToken(AsmToken::Integer)) { 8233 SMLoc Loc = getToken().getEndLoc(); 8234 Token = std::string(getTokenStr()); 8235 lex(); 8236 if (getLoc() != Loc) 8237 return false; 8238 } 8239 8240 StringRef Suffix; 8241 if (!parseId(Suffix)) 8242 return false; 8243 Token += Suffix; 8244 8245 StringRef DimId = Token; 8246 if (DimId.startswith("SQ_RSRC_IMG_")) 8247 DimId = DimId.drop_front(12); 8248 8249 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8250 if (!DimInfo) 8251 return false; 8252 8253 Encoding = DimInfo->Encoding; 8254 return true; 8255 } 8256 8257 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8258 if (!isGFX10Plus()) 8259 return MatchOperand_NoMatch; 8260 8261 SMLoc S = getLoc(); 8262 8263 if (!trySkipId("dim", AsmToken::Colon)) 8264 return MatchOperand_NoMatch; 8265 8266 unsigned Encoding; 8267 SMLoc Loc = getLoc(); 8268 if (!parseDimId(Encoding)) { 8269 Error(Loc, "invalid dim value"); 8270 return MatchOperand_ParseFail; 8271 } 8272 8273 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8274 AMDGPUOperand::ImmTyDim)); 8275 return MatchOperand_Success; 8276 } 8277 8278 //===----------------------------------------------------------------------===// 8279 // dpp 8280 //===----------------------------------------------------------------------===// 8281 8282 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8283 SMLoc S = getLoc(); 8284 8285 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8286 return MatchOperand_NoMatch; 8287 8288 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8289 8290 int64_t Sels[8]; 8291 8292 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8293 return MatchOperand_ParseFail; 8294 8295 for (size_t i = 0; i < 8; ++i) { 8296 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8297 return MatchOperand_ParseFail; 8298 8299 SMLoc Loc = getLoc(); 8300 if (getParser().parseAbsoluteExpression(Sels[i])) 8301 return MatchOperand_ParseFail; 8302 if (0 > Sels[i] || 7 < Sels[i]) { 8303 Error(Loc, "expected a 3-bit value"); 8304 return MatchOperand_ParseFail; 8305 } 8306 } 8307 8308 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8309 return MatchOperand_ParseFail; 8310 8311 unsigned DPP8 = 0; 8312 for (size_t i = 0; i < 8; ++i) 8313 DPP8 |= (Sels[i] << (i * 3)); 8314 8315 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8316 return MatchOperand_Success; 8317 } 8318 8319 bool 8320 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8321 const OperandVector &Operands) { 8322 if (Ctrl == "row_newbcast") 8323 return isGFX90A(); 8324 8325 if (Ctrl == "row_share" || 8326 Ctrl == "row_xmask") 8327 return isGFX10Plus(); 8328 8329 if (Ctrl == "wave_shl" || 8330 Ctrl == "wave_shr" || 8331 Ctrl == "wave_rol" || 8332 Ctrl == "wave_ror" || 8333 Ctrl == "row_bcast") 8334 return isVI() || isGFX9(); 8335 8336 return Ctrl == "row_mirror" || 8337 Ctrl == "row_half_mirror" || 8338 Ctrl == "quad_perm" || 8339 Ctrl == "row_shl" || 8340 Ctrl == "row_shr" || 8341 Ctrl == "row_ror"; 8342 } 8343 8344 int64_t 8345 AMDGPUAsmParser::parseDPPCtrlPerm() { 8346 // quad_perm:[%d,%d,%d,%d] 8347 8348 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8349 return -1; 8350 8351 int64_t Val = 0; 8352 for (int i = 0; i < 4; ++i) { 8353 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8354 return -1; 8355 8356 int64_t Temp; 8357 SMLoc Loc = getLoc(); 8358 if (getParser().parseAbsoluteExpression(Temp)) 8359 return -1; 8360 if (Temp < 0 || Temp > 3) { 8361 Error(Loc, "expected a 2-bit value"); 8362 return -1; 8363 } 8364 8365 Val += (Temp << i * 2); 8366 } 8367 8368 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8369 return -1; 8370 8371 return Val; 8372 } 8373 8374 int64_t 8375 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8376 using namespace AMDGPU::DPP; 8377 8378 // sel:%d 8379 8380 int64_t Val; 8381 SMLoc Loc = getLoc(); 8382 8383 if (getParser().parseAbsoluteExpression(Val)) 8384 return -1; 8385 8386 struct DppCtrlCheck { 8387 int64_t Ctrl; 8388 int Lo; 8389 int Hi; 8390 }; 8391 8392 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8393 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8394 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8395 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8396 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8397 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8398 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8399 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8400 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8401 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8402 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8403 .Default({-1, 0, 0}); 8404 8405 bool Valid; 8406 if (Check.Ctrl == -1) { 8407 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8408 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8409 } else { 8410 Valid = Check.Lo <= Val && Val <= Check.Hi; 8411 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8412 } 8413 8414 if (!Valid) { 8415 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8416 return -1; 8417 } 8418 8419 return Val; 8420 } 8421 8422 OperandMatchResultTy 8423 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8424 using namespace AMDGPU::DPP; 8425 8426 if (!isToken(AsmToken::Identifier) || 8427 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8428 return MatchOperand_NoMatch; 8429 8430 SMLoc S = getLoc(); 8431 int64_t Val = -1; 8432 StringRef Ctrl; 8433 8434 parseId(Ctrl); 8435 8436 if (Ctrl == "row_mirror") { 8437 Val = DppCtrl::ROW_MIRROR; 8438 } else if (Ctrl == "row_half_mirror") { 8439 Val = DppCtrl::ROW_HALF_MIRROR; 8440 } else { 8441 if (skipToken(AsmToken::Colon, "expected a colon")) { 8442 if (Ctrl == "quad_perm") { 8443 Val = parseDPPCtrlPerm(); 8444 } else { 8445 Val = parseDPPCtrlSel(Ctrl); 8446 } 8447 } 8448 } 8449 8450 if (Val == -1) 8451 return MatchOperand_ParseFail; 8452 8453 Operands.push_back( 8454 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8455 return MatchOperand_Success; 8456 } 8457 8458 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8459 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8460 } 8461 8462 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8463 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8464 } 8465 8466 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8467 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8468 } 8469 8470 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8471 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8472 } 8473 8474 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8475 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8476 } 8477 8478 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8479 OptionalImmIndexMap OptionalIdx; 8480 8481 unsigned Opc = Inst.getOpcode(); 8482 bool HasModifiers = 8483 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8484 unsigned I = 1; 8485 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8486 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8487 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8488 } 8489 8490 int Fi = 0; 8491 for (unsigned E = Operands.size(); I != E; ++I) { 8492 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8493 MCOI::TIED_TO); 8494 if (TiedTo != -1) { 8495 assert((unsigned)TiedTo < Inst.getNumOperands()); 8496 // handle tied old or src2 for MAC instructions 8497 Inst.addOperand(Inst.getOperand(TiedTo)); 8498 } 8499 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8500 // Add the register arguments 8501 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8502 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8503 // Skip it. 8504 continue; 8505 } 8506 8507 if (IsDPP8) { 8508 if (Op.isDPP8()) { 8509 Op.addImmOperands(Inst, 1); 8510 } else if (HasModifiers && 8511 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8512 Op.addRegWithFPInputModsOperands(Inst, 2); 8513 } else if (Op.isFI()) { 8514 Fi = Op.getImm(); 8515 } else if (Op.isReg()) { 8516 Op.addRegOperands(Inst, 1); 8517 } else { 8518 llvm_unreachable("Invalid operand type"); 8519 } 8520 } else { 8521 if (HasModifiers && 8522 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8523 Op.addRegWithFPInputModsOperands(Inst, 2); 8524 } else if (Op.isReg()) { 8525 Op.addRegOperands(Inst, 1); 8526 } else if (Op.isDPPCtrl()) { 8527 Op.addImmOperands(Inst, 1); 8528 } else if (Op.isImm()) { 8529 // Handle optional arguments 8530 OptionalIdx[Op.getImmTy()] = I; 8531 } else { 8532 llvm_unreachable("Invalid operand type"); 8533 } 8534 } 8535 } 8536 8537 if (IsDPP8) { 8538 using namespace llvm::AMDGPU::DPP; 8539 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8540 } else { 8541 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8542 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8543 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8544 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8545 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8546 } 8547 } 8548 } 8549 8550 //===----------------------------------------------------------------------===// 8551 // sdwa 8552 //===----------------------------------------------------------------------===// 8553 8554 OperandMatchResultTy 8555 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8556 AMDGPUOperand::ImmTy Type) { 8557 using namespace llvm::AMDGPU::SDWA; 8558 8559 SMLoc S = getLoc(); 8560 StringRef Value; 8561 OperandMatchResultTy res; 8562 8563 SMLoc StringLoc; 8564 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8565 if (res != MatchOperand_Success) { 8566 return res; 8567 } 8568 8569 int64_t Int; 8570 Int = StringSwitch<int64_t>(Value) 8571 .Case("BYTE_0", SdwaSel::BYTE_0) 8572 .Case("BYTE_1", SdwaSel::BYTE_1) 8573 .Case("BYTE_2", SdwaSel::BYTE_2) 8574 .Case("BYTE_3", SdwaSel::BYTE_3) 8575 .Case("WORD_0", SdwaSel::WORD_0) 8576 .Case("WORD_1", SdwaSel::WORD_1) 8577 .Case("DWORD", SdwaSel::DWORD) 8578 .Default(0xffffffff); 8579 8580 if (Int == 0xffffffff) { 8581 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8582 return MatchOperand_ParseFail; 8583 } 8584 8585 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8586 return MatchOperand_Success; 8587 } 8588 8589 OperandMatchResultTy 8590 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8591 using namespace llvm::AMDGPU::SDWA; 8592 8593 SMLoc S = getLoc(); 8594 StringRef Value; 8595 OperandMatchResultTy res; 8596 8597 SMLoc StringLoc; 8598 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8599 if (res != MatchOperand_Success) { 8600 return res; 8601 } 8602 8603 int64_t Int; 8604 Int = StringSwitch<int64_t>(Value) 8605 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8606 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8607 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8608 .Default(0xffffffff); 8609 8610 if (Int == 0xffffffff) { 8611 Error(StringLoc, "invalid dst_unused value"); 8612 return MatchOperand_ParseFail; 8613 } 8614 8615 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8616 return MatchOperand_Success; 8617 } 8618 8619 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8620 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8621 } 8622 8623 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8624 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8625 } 8626 8627 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8628 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8629 } 8630 8631 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8632 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8633 } 8634 8635 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8636 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8637 } 8638 8639 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8640 uint64_t BasicInstType, 8641 bool SkipDstVcc, 8642 bool SkipSrcVcc) { 8643 using namespace llvm::AMDGPU::SDWA; 8644 8645 OptionalImmIndexMap OptionalIdx; 8646 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8647 bool SkippedVcc = false; 8648 8649 unsigned I = 1; 8650 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8651 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8652 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8653 } 8654 8655 for (unsigned E = Operands.size(); I != E; ++I) { 8656 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8657 if (SkipVcc && !SkippedVcc && Op.isReg() && 8658 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8659 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8660 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8661 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8662 // Skip VCC only if we didn't skip it on previous iteration. 8663 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8664 if (BasicInstType == SIInstrFlags::VOP2 && 8665 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8666 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8667 SkippedVcc = true; 8668 continue; 8669 } else if (BasicInstType == SIInstrFlags::VOPC && 8670 Inst.getNumOperands() == 0) { 8671 SkippedVcc = true; 8672 continue; 8673 } 8674 } 8675 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8676 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8677 } else if (Op.isImm()) { 8678 // Handle optional arguments 8679 OptionalIdx[Op.getImmTy()] = I; 8680 } else { 8681 llvm_unreachable("Invalid operand type"); 8682 } 8683 SkippedVcc = false; 8684 } 8685 8686 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8687 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8688 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8689 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8690 switch (BasicInstType) { 8691 case SIInstrFlags::VOP1: 8692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8693 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8694 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8695 } 8696 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8697 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8698 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8699 break; 8700 8701 case SIInstrFlags::VOP2: 8702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8703 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8705 } 8706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8707 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8710 break; 8711 8712 case SIInstrFlags::VOPC: 8713 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8714 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8715 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8716 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8717 break; 8718 8719 default: 8720 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8721 } 8722 } 8723 8724 // special case v_mac_{f16, f32}: 8725 // it has src2 register operand that is tied to dst operand 8726 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8727 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8728 auto it = Inst.begin(); 8729 std::advance( 8730 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8731 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8732 } 8733 } 8734 8735 //===----------------------------------------------------------------------===// 8736 // mAI 8737 //===----------------------------------------------------------------------===// 8738 8739 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8740 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8741 } 8742 8743 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8744 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8745 } 8746 8747 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8748 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8749 } 8750 8751 /// Force static initialization. 8752 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8753 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8754 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8755 } 8756 8757 #define GET_REGISTER_MATCHER 8758 #define GET_MATCHER_IMPLEMENTATION 8759 #define GET_MNEMONIC_SPELL_CHECKER 8760 #define GET_MNEMONIC_CHECKER 8761 #include "AMDGPUGenAsmMatcher.inc" 8762 8763 // This function should be defined after auto-generated include so that we have 8764 // MatchClassKind enum defined 8765 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8766 unsigned Kind) { 8767 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8768 // But MatchInstructionImpl() expects to meet token and fails to validate 8769 // operand. This method checks if we are given immediate operand but expect to 8770 // get corresponding token. 8771 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8772 switch (Kind) { 8773 case MCK_addr64: 8774 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8775 case MCK_gds: 8776 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8777 case MCK_lds: 8778 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8779 case MCK_idxen: 8780 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8781 case MCK_offen: 8782 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8783 case MCK_SSrcB32: 8784 // When operands have expression values, they will return true for isToken, 8785 // because it is not possible to distinguish between a token and an 8786 // expression at parse time. MatchInstructionImpl() will always try to 8787 // match an operand as a token, when isToken returns true, and when the 8788 // name of the expression is not a valid token, the match will fail, 8789 // so we need to handle it here. 8790 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8791 case MCK_SSrcF32: 8792 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8793 case MCK_SoppBrTarget: 8794 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8795 case MCK_VReg32OrOff: 8796 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8797 case MCK_InterpSlot: 8798 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8799 case MCK_Attr: 8800 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8801 case MCK_AttrChan: 8802 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8803 case MCK_ImmSMEMOffset: 8804 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8805 case MCK_SReg_64: 8806 case MCK_SReg_64_XEXEC: 8807 // Null is defined as a 32-bit register but 8808 // it should also be enabled with 64-bit operands. 8809 // The following code enables it for SReg_64 operands 8810 // used as source and destination. Remaining source 8811 // operands are handled in isInlinableImm. 8812 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8813 default: 8814 return Match_InvalidOperand; 8815 } 8816 } 8817 8818 //===----------------------------------------------------------------------===// 8819 // endpgm 8820 //===----------------------------------------------------------------------===// 8821 8822 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8823 SMLoc S = getLoc(); 8824 int64_t Imm = 0; 8825 8826 if (!parseExpr(Imm)) { 8827 // The operand is optional, if not present default to 0 8828 Imm = 0; 8829 } 8830 8831 if (!isUInt<16>(Imm)) { 8832 Error(S, "expected a 16-bit value"); 8833 return MatchOperand_ParseFail; 8834 } 8835 8836 Operands.push_back( 8837 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8838 return MatchOperand_Success; 8839 } 8840 8841 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8842