1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isDepCtr() const; 822 bool isSDelayAlu() const; 823 bool isHwreg() const; 824 bool isSendMsg() const; 825 bool isSwizzle() const; 826 bool isSMRDOffset8() const; 827 bool isSMEMOffset() const; 828 bool isSMRDLiteralOffset() const; 829 bool isDPP8() const; 830 bool isDPPCtrl() const; 831 bool isBLGP() const; 832 bool isCBSZ() const; 833 bool isABID() const; 834 bool isGPRIdxMode() const; 835 bool isS16Imm() const; 836 bool isU16Imm() const; 837 bool isEndpgm() const; 838 839 StringRef getExpressionAsToken() const { 840 assert(isExpr()); 841 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 842 return S->getSymbol().getName(); 843 } 844 845 StringRef getToken() const { 846 assert(isToken()); 847 848 if (Kind == Expression) 849 return getExpressionAsToken(); 850 851 return StringRef(Tok.Data, Tok.Length); 852 } 853 854 int64_t getImm() const { 855 assert(isImm()); 856 return Imm.Val; 857 } 858 859 void setImm(int64_t Val) { 860 assert(isImm()); 861 Imm.Val = Val; 862 } 863 864 ImmTy getImmTy() const { 865 assert(isImm()); 866 return Imm.Type; 867 } 868 869 unsigned getReg() const override { 870 assert(isRegKind()); 871 return Reg.RegNo; 872 } 873 874 SMLoc getStartLoc() const override { 875 return StartLoc; 876 } 877 878 SMLoc getEndLoc() const override { 879 return EndLoc; 880 } 881 882 SMRange getLocRange() const { 883 return SMRange(StartLoc, EndLoc); 884 } 885 886 Modifiers getModifiers() const { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 return isRegKind() ? Reg.Mods : Imm.Mods; 889 } 890 891 void setModifiers(Modifiers Mods) { 892 assert(isRegKind() || isImmTy(ImmTyNone)); 893 if (isRegKind()) 894 Reg.Mods = Mods; 895 else 896 Imm.Mods = Mods; 897 } 898 899 bool hasModifiers() const { 900 return getModifiers().hasModifiers(); 901 } 902 903 bool hasFPModifiers() const { 904 return getModifiers().hasFPModifiers(); 905 } 906 907 bool hasIntModifiers() const { 908 return getModifiers().hasIntModifiers(); 909 } 910 911 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 912 913 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 914 915 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 916 917 template <unsigned Bitwidth> 918 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 919 920 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 921 addKImmFPOperands<16>(Inst, N); 922 } 923 924 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 925 addKImmFPOperands<32>(Inst, N); 926 } 927 928 void addRegOperands(MCInst &Inst, unsigned N) const; 929 930 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 931 addRegOperands(Inst, N); 932 } 933 934 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 935 if (isRegKind()) 936 addRegOperands(Inst, N); 937 else if (isExpr()) 938 Inst.addOperand(MCOperand::createExpr(Expr)); 939 else 940 addImmOperands(Inst, N); 941 } 942 943 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 944 Modifiers Mods = getModifiers(); 945 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 946 if (isRegKind()) { 947 addRegOperands(Inst, N); 948 } else { 949 addImmOperands(Inst, N, false); 950 } 951 } 952 953 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasIntModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 959 assert(!hasFPModifiers()); 960 addRegOrImmWithInputModsOperands(Inst, N); 961 } 962 963 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 964 Modifiers Mods = getModifiers(); 965 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 966 assert(isRegKind()); 967 addRegOperands(Inst, N); 968 } 969 970 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasIntModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasFPModifiers()); 977 addRegWithInputModsOperands(Inst, N); 978 } 979 980 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 981 if (isImm()) 982 addImmOperands(Inst, N); 983 else { 984 assert(isExpr()); 985 Inst.addOperand(MCOperand::createExpr(Expr)); 986 } 987 } 988 989 static void printImmTy(raw_ostream& OS, ImmTy Type) { 990 switch (Type) { 991 case ImmTyNone: OS << "None"; break; 992 case ImmTyGDS: OS << "GDS"; break; 993 case ImmTyLDS: OS << "LDS"; break; 994 case ImmTyOffen: OS << "Offen"; break; 995 case ImmTyIdxen: OS << "Idxen"; break; 996 case ImmTyAddr64: OS << "Addr64"; break; 997 case ImmTyOffset: OS << "Offset"; break; 998 case ImmTyInstOffset: OS << "InstOffset"; break; 999 case ImmTyOffset0: OS << "Offset0"; break; 1000 case ImmTyOffset1: OS << "Offset1"; break; 1001 case ImmTyCPol: OS << "CPol"; break; 1002 case ImmTySWZ: OS << "SWZ"; break; 1003 case ImmTyTFE: OS << "TFE"; break; 1004 case ImmTyD16: OS << "D16"; break; 1005 case ImmTyFORMAT: OS << "FORMAT"; break; 1006 case ImmTyClampSI: OS << "ClampSI"; break; 1007 case ImmTyOModSI: OS << "OModSI"; break; 1008 case ImmTyDPP8: OS << "DPP8"; break; 1009 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1010 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1011 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1012 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1013 case ImmTyDppFi: OS << "FI"; break; 1014 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1015 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1016 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1017 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1018 case ImmTyDMask: OS << "DMask"; break; 1019 case ImmTyDim: OS << "Dim"; break; 1020 case ImmTyUNorm: OS << "UNorm"; break; 1021 case ImmTyDA: OS << "DA"; break; 1022 case ImmTyR128A16: OS << "R128A16"; break; 1023 case ImmTyA16: OS << "A16"; break; 1024 case ImmTyLWE: OS << "LWE"; break; 1025 case ImmTyOff: OS << "Off"; break; 1026 case ImmTyExpTgt: OS << "ExpTgt"; break; 1027 case ImmTyExpCompr: OS << "ExpCompr"; break; 1028 case ImmTyExpVM: OS << "ExpVM"; break; 1029 case ImmTyHwreg: OS << "Hwreg"; break; 1030 case ImmTySendMsg: OS << "SendMsg"; break; 1031 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1032 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1033 case ImmTyAttrChan: OS << "AttrChan"; break; 1034 case ImmTyOpSel: OS << "OpSel"; break; 1035 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1036 case ImmTyNegLo: OS << "NegLo"; break; 1037 case ImmTyNegHi: OS << "NegHi"; break; 1038 case ImmTySwizzle: OS << "Swizzle"; break; 1039 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1040 case ImmTyHigh: OS << "High"; break; 1041 case ImmTyBLGP: OS << "BLGP"; break; 1042 case ImmTyCBSZ: OS << "CBSZ"; break; 1043 case ImmTyABID: OS << "ABID"; break; 1044 case ImmTyEndpgm: OS << "Endpgm"; break; 1045 } 1046 } 1047 1048 void print(raw_ostream &OS) const override { 1049 switch (Kind) { 1050 case Register: 1051 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1052 break; 1053 case Immediate: 1054 OS << '<' << getImm(); 1055 if (getImmTy() != ImmTyNone) { 1056 OS << " type: "; printImmTy(OS, getImmTy()); 1057 } 1058 OS << " mods: " << Imm.Mods << '>'; 1059 break; 1060 case Token: 1061 OS << '\'' << getToken() << '\''; 1062 break; 1063 case Expression: 1064 OS << "<expr " << *Expr << '>'; 1065 break; 1066 } 1067 } 1068 1069 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1070 int64_t Val, SMLoc Loc, 1071 ImmTy Type = ImmTyNone, 1072 bool IsFPImm = false) { 1073 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1074 Op->Imm.Val = Val; 1075 Op->Imm.IsFPImm = IsFPImm; 1076 Op->Imm.Kind = ImmKindTyNone; 1077 Op->Imm.Type = Type; 1078 Op->Imm.Mods = Modifiers(); 1079 Op->StartLoc = Loc; 1080 Op->EndLoc = Loc; 1081 return Op; 1082 } 1083 1084 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1085 StringRef Str, SMLoc Loc, 1086 bool HasExplicitEncodingSize = true) { 1087 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1088 Res->Tok.Data = Str.data(); 1089 Res->Tok.Length = Str.size(); 1090 Res->StartLoc = Loc; 1091 Res->EndLoc = Loc; 1092 return Res; 1093 } 1094 1095 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1096 unsigned RegNo, SMLoc S, 1097 SMLoc E) { 1098 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1099 Op->Reg.RegNo = RegNo; 1100 Op->Reg.Mods = Modifiers(); 1101 Op->StartLoc = S; 1102 Op->EndLoc = E; 1103 return Op; 1104 } 1105 1106 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1107 const class MCExpr *Expr, SMLoc S) { 1108 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1109 Op->Expr = Expr; 1110 Op->StartLoc = S; 1111 Op->EndLoc = S; 1112 return Op; 1113 } 1114 }; 1115 1116 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1117 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1118 return OS; 1119 } 1120 1121 //===----------------------------------------------------------------------===// 1122 // AsmParser 1123 //===----------------------------------------------------------------------===// 1124 1125 // Holds info related to the current kernel, e.g. count of SGPRs used. 1126 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1127 // .amdgpu_hsa_kernel or at EOF. 1128 class KernelScopeInfo { 1129 int SgprIndexUnusedMin = -1; 1130 int VgprIndexUnusedMin = -1; 1131 int AgprIndexUnusedMin = -1; 1132 MCContext *Ctx = nullptr; 1133 MCSubtargetInfo const *MSTI = nullptr; 1134 1135 void usesSgprAt(int i) { 1136 if (i >= SgprIndexUnusedMin) { 1137 SgprIndexUnusedMin = ++i; 1138 if (Ctx) { 1139 MCSymbol* const Sym = 1140 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1141 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1142 } 1143 } 1144 } 1145 1146 void usesVgprAt(int i) { 1147 if (i >= VgprIndexUnusedMin) { 1148 VgprIndexUnusedMin = ++i; 1149 if (Ctx) { 1150 MCSymbol* const Sym = 1151 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1152 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1153 VgprIndexUnusedMin); 1154 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1155 } 1156 } 1157 } 1158 1159 void usesAgprAt(int i) { 1160 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1161 if (!hasMAIInsts(*MSTI)) 1162 return; 1163 1164 if (i >= AgprIndexUnusedMin) { 1165 AgprIndexUnusedMin = ++i; 1166 if (Ctx) { 1167 MCSymbol* const Sym = 1168 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1169 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1170 1171 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1172 MCSymbol* const vSym = 1173 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1174 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1175 VgprIndexUnusedMin); 1176 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1177 } 1178 } 1179 } 1180 1181 public: 1182 KernelScopeInfo() = default; 1183 1184 void initialize(MCContext &Context) { 1185 Ctx = &Context; 1186 MSTI = Ctx->getSubtargetInfo(); 1187 1188 usesSgprAt(SgprIndexUnusedMin = -1); 1189 usesVgprAt(VgprIndexUnusedMin = -1); 1190 if (hasMAIInsts(*MSTI)) { 1191 usesAgprAt(AgprIndexUnusedMin = -1); 1192 } 1193 } 1194 1195 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1196 unsigned RegWidth) { 1197 switch (RegKind) { 1198 case IS_SGPR: 1199 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1200 break; 1201 case IS_AGPR: 1202 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1203 break; 1204 case IS_VGPR: 1205 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1206 break; 1207 default: 1208 break; 1209 } 1210 } 1211 }; 1212 1213 class AMDGPUAsmParser : public MCTargetAsmParser { 1214 MCAsmParser &Parser; 1215 1216 // Number of extra operands parsed after the first optional operand. 1217 // This may be necessary to skip hardcoded mandatory operands. 1218 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1219 1220 unsigned ForcedEncodingSize = 0; 1221 bool ForcedDPP = false; 1222 bool ForcedSDWA = false; 1223 KernelScopeInfo KernelScope; 1224 unsigned CPolSeen; 1225 1226 /// @name Auto-generated Match Functions 1227 /// { 1228 1229 #define GET_ASSEMBLER_HEADER 1230 #include "AMDGPUGenAsmMatcher.inc" 1231 1232 /// } 1233 1234 private: 1235 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1236 bool OutOfRangeError(SMRange Range); 1237 /// Calculate VGPR/SGPR blocks required for given target, reserved 1238 /// registers, and user-specified NextFreeXGPR values. 1239 /// 1240 /// \param Features [in] Target features, used for bug corrections. 1241 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1242 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1243 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1244 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1245 /// descriptor field, if valid. 1246 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1247 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1248 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1249 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1250 /// \param VGPRBlocks [out] Result VGPR block count. 1251 /// \param SGPRBlocks [out] Result SGPR block count. 1252 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1253 bool FlatScrUsed, bool XNACKUsed, 1254 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1255 SMRange VGPRRange, unsigned NextFreeSGPR, 1256 SMRange SGPRRange, unsigned &VGPRBlocks, 1257 unsigned &SGPRBlocks); 1258 bool ParseDirectiveAMDGCNTarget(); 1259 bool ParseDirectiveAMDHSAKernel(); 1260 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1261 bool ParseDirectiveHSACodeObjectVersion(); 1262 bool ParseDirectiveHSACodeObjectISA(); 1263 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1264 bool ParseDirectiveAMDKernelCodeT(); 1265 // TODO: Possibly make subtargetHasRegister const. 1266 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1267 bool ParseDirectiveAMDGPUHsaKernel(); 1268 1269 bool ParseDirectiveISAVersion(); 1270 bool ParseDirectiveHSAMetadata(); 1271 bool ParseDirectivePALMetadataBegin(); 1272 bool ParseDirectivePALMetadata(); 1273 bool ParseDirectiveAMDGPULDS(); 1274 1275 /// Common code to parse out a block of text (typically YAML) between start and 1276 /// end directives. 1277 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1278 const char *AssemblerDirectiveEnd, 1279 std::string &CollectString); 1280 1281 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1282 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1283 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1284 unsigned &RegNum, unsigned &RegWidth, 1285 bool RestoreOnFailure = false); 1286 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1287 unsigned &RegNum, unsigned &RegWidth, 1288 SmallVectorImpl<AsmToken> &Tokens); 1289 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1290 unsigned &RegWidth, 1291 SmallVectorImpl<AsmToken> &Tokens); 1292 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1293 unsigned &RegWidth, 1294 SmallVectorImpl<AsmToken> &Tokens); 1295 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1296 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1297 bool ParseRegRange(unsigned& Num, unsigned& Width); 1298 unsigned getRegularReg(RegisterKind RegKind, 1299 unsigned RegNum, 1300 unsigned RegWidth, 1301 SMLoc Loc); 1302 1303 bool isRegister(); 1304 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1305 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1306 void initializeGprCountSymbol(RegisterKind RegKind); 1307 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1308 unsigned RegWidth); 1309 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1310 bool IsAtomic, bool IsLds = false); 1311 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1312 bool IsGdsHardcoded); 1313 1314 public: 1315 enum AMDGPUMatchResultTy { 1316 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1317 }; 1318 enum OperandMode { 1319 OperandMode_Default, 1320 OperandMode_NSA, 1321 }; 1322 1323 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1324 1325 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1326 const MCInstrInfo &MII, 1327 const MCTargetOptions &Options) 1328 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1329 MCAsmParserExtension::Initialize(Parser); 1330 1331 if (getFeatureBits().none()) { 1332 // Set default features. 1333 copySTI().ToggleFeature("southern-islands"); 1334 } 1335 1336 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1337 1338 { 1339 // TODO: make those pre-defined variables read-only. 1340 // Currently there is none suitable machinery in the core llvm-mc for this. 1341 // MCSymbol::isRedefinable is intended for another purpose, and 1342 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1343 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1344 MCContext &Ctx = getContext(); 1345 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1346 MCSymbol *Sym = 1347 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1348 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1349 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1350 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1351 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1352 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1353 } else { 1354 MCSymbol *Sym = 1355 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1357 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1359 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1360 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1361 } 1362 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1363 initializeGprCountSymbol(IS_VGPR); 1364 initializeGprCountSymbol(IS_SGPR); 1365 } else 1366 KernelScope.initialize(getContext()); 1367 } 1368 } 1369 1370 bool hasMIMG_R128() const { 1371 return AMDGPU::hasMIMG_R128(getSTI()); 1372 } 1373 1374 bool hasPackedD16() const { 1375 return AMDGPU::hasPackedD16(getSTI()); 1376 } 1377 1378 bool hasGFX10A16() const { 1379 return AMDGPU::hasGFX10A16(getSTI()); 1380 } 1381 1382 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1383 1384 bool isSI() const { 1385 return AMDGPU::isSI(getSTI()); 1386 } 1387 1388 bool isCI() const { 1389 return AMDGPU::isCI(getSTI()); 1390 } 1391 1392 bool isVI() const { 1393 return AMDGPU::isVI(getSTI()); 1394 } 1395 1396 bool isGFX9() const { 1397 return AMDGPU::isGFX9(getSTI()); 1398 } 1399 1400 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1401 bool isGFX90A() const { 1402 return AMDGPU::isGFX90A(getSTI()); 1403 } 1404 1405 bool isGFX940() const { 1406 return AMDGPU::isGFX940(getSTI()); 1407 } 1408 1409 bool isGFX9Plus() const { 1410 return AMDGPU::isGFX9Plus(getSTI()); 1411 } 1412 1413 bool isGFX10() const { 1414 return AMDGPU::isGFX10(getSTI()); 1415 } 1416 1417 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1418 1419 bool isGFX11() const { 1420 return AMDGPU::isGFX11(getSTI()); 1421 } 1422 1423 bool isGFX11Plus() const { 1424 return AMDGPU::isGFX11Plus(getSTI()); 1425 } 1426 1427 bool isGFX10_BEncoding() const { 1428 return AMDGPU::isGFX10_BEncoding(getSTI()); 1429 } 1430 1431 bool hasInv2PiInlineImm() const { 1432 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1433 } 1434 1435 bool hasFlatOffsets() const { 1436 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1437 } 1438 1439 bool hasArchitectedFlatScratch() const { 1440 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1441 } 1442 1443 bool hasSGPR102_SGPR103() const { 1444 return !isVI() && !isGFX9(); 1445 } 1446 1447 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1448 1449 bool hasIntClamp() const { 1450 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1451 } 1452 1453 AMDGPUTargetStreamer &getTargetStreamer() { 1454 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1455 return static_cast<AMDGPUTargetStreamer &>(TS); 1456 } 1457 1458 const MCRegisterInfo *getMRI() const { 1459 // We need this const_cast because for some reason getContext() is not const 1460 // in MCAsmParser. 1461 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1462 } 1463 1464 const MCInstrInfo *getMII() const { 1465 return &MII; 1466 } 1467 1468 const FeatureBitset &getFeatureBits() const { 1469 return getSTI().getFeatureBits(); 1470 } 1471 1472 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1473 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1474 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1475 1476 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1477 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1478 bool isForcedDPP() const { return ForcedDPP; } 1479 bool isForcedSDWA() const { return ForcedSDWA; } 1480 ArrayRef<unsigned> getMatchedVariants() const; 1481 StringRef getMatchedVariantName() const; 1482 1483 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1484 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1485 bool RestoreOnFailure); 1486 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1487 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1488 SMLoc &EndLoc) override; 1489 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1490 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1491 unsigned Kind) override; 1492 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1493 OperandVector &Operands, MCStreamer &Out, 1494 uint64_t &ErrorInfo, 1495 bool MatchingInlineAsm) override; 1496 bool ParseDirective(AsmToken DirectiveID) override; 1497 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1498 OperandMode Mode = OperandMode_Default); 1499 StringRef parseMnemonicSuffix(StringRef Name); 1500 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1501 SMLoc NameLoc, OperandVector &Operands) override; 1502 //bool ProcessInstruction(MCInst &Inst); 1503 1504 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1505 1506 OperandMatchResultTy 1507 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1508 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1509 bool (*ConvertResult)(int64_t &) = nullptr); 1510 1511 OperandMatchResultTy 1512 parseOperandArrayWithPrefix(const char *Prefix, 1513 OperandVector &Operands, 1514 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1515 bool (*ConvertResult)(int64_t&) = nullptr); 1516 1517 OperandMatchResultTy 1518 parseNamedBit(StringRef Name, OperandVector &Operands, 1519 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1520 OperandMatchResultTy parseCPol(OperandVector &Operands); 1521 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1522 StringRef &Value, 1523 SMLoc &StringLoc); 1524 1525 bool isModifier(); 1526 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1527 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1528 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1529 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1530 bool parseSP3NegModifier(); 1531 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1532 OperandMatchResultTy parseReg(OperandVector &Operands); 1533 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1534 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1535 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1536 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1537 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1538 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1539 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1540 OperandMatchResultTy parseUfmt(int64_t &Format); 1541 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1542 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1543 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1544 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1545 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1546 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1547 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1548 1549 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1550 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1551 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1552 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1553 1554 bool parseCnt(int64_t &IntVal); 1555 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1556 1557 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1558 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1559 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1560 1561 bool parseDelay(int64_t &Delay); 1562 OperandMatchResultTy parseSDelayAluOps(OperandVector &Operands); 1563 1564 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1565 1566 private: 1567 struct OperandInfoTy { 1568 SMLoc Loc; 1569 int64_t Id; 1570 bool IsSymbolic = false; 1571 bool IsDefined = false; 1572 1573 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1574 }; 1575 1576 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1577 bool validateSendMsg(const OperandInfoTy &Msg, 1578 const OperandInfoTy &Op, 1579 const OperandInfoTy &Stream); 1580 1581 bool parseHwregBody(OperandInfoTy &HwReg, 1582 OperandInfoTy &Offset, 1583 OperandInfoTy &Width); 1584 bool validateHwreg(const OperandInfoTy &HwReg, 1585 const OperandInfoTy &Offset, 1586 const OperandInfoTy &Width); 1587 1588 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1589 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1590 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1591 1592 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1593 const OperandVector &Operands) const; 1594 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1595 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1596 SMLoc getLitLoc(const OperandVector &Operands) const; 1597 SMLoc getConstLoc(const OperandVector &Operands) const; 1598 1599 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1600 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1601 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1602 bool validateSOPLiteral(const MCInst &Inst) const; 1603 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1605 bool validateIntClampSupported(const MCInst &Inst); 1606 bool validateMIMGAtomicDMask(const MCInst &Inst); 1607 bool validateMIMGGatherDMask(const MCInst &Inst); 1608 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1609 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1610 bool validateMIMGAddrSize(const MCInst &Inst); 1611 bool validateMIMGD16(const MCInst &Inst); 1612 bool validateMIMGDim(const MCInst &Inst); 1613 bool validateMIMGMSAA(const MCInst &Inst); 1614 bool validateOpSel(const MCInst &Inst); 1615 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1616 bool validateVccOperand(unsigned Reg) const; 1617 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1618 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1619 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1620 bool validateAGPRLdSt(const MCInst &Inst) const; 1621 bool validateVGPRAlign(const MCInst &Inst) const; 1622 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1623 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1624 bool validateDivScale(const MCInst &Inst); 1625 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1626 const SMLoc &IDLoc); 1627 bool validateFlatLdsDMA(const MCInst &Inst, const OperandVector &Operands, 1628 const SMLoc &IDLoc); 1629 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1630 unsigned getConstantBusLimit(unsigned Opcode) const; 1631 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1632 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1633 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1634 1635 bool isSupportedMnemo(StringRef Mnemo, 1636 const FeatureBitset &FBS); 1637 bool isSupportedMnemo(StringRef Mnemo, 1638 const FeatureBitset &FBS, 1639 ArrayRef<unsigned> Variants); 1640 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1641 1642 bool isId(const StringRef Id) const; 1643 bool isId(const AsmToken &Token, const StringRef Id) const; 1644 bool isToken(const AsmToken::TokenKind Kind) const; 1645 bool trySkipId(const StringRef Id); 1646 bool trySkipId(const StringRef Pref, const StringRef Id); 1647 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1648 bool trySkipToken(const AsmToken::TokenKind Kind); 1649 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1650 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1651 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1652 1653 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1654 AsmToken::TokenKind getTokenKind() const; 1655 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1656 bool parseExpr(OperandVector &Operands); 1657 StringRef getTokenStr() const; 1658 AsmToken peekToken(); 1659 AsmToken getToken() const; 1660 SMLoc getLoc() const; 1661 void lex(); 1662 1663 public: 1664 void onBeginOfFile() override; 1665 1666 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1667 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1668 1669 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1670 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1671 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1672 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1673 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1674 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1675 1676 bool parseSwizzleOperand(int64_t &Op, 1677 const unsigned MinVal, 1678 const unsigned MaxVal, 1679 const StringRef ErrMsg, 1680 SMLoc &Loc); 1681 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1682 const unsigned MinVal, 1683 const unsigned MaxVal, 1684 const StringRef ErrMsg); 1685 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1686 bool parseSwizzleOffset(int64_t &Imm); 1687 bool parseSwizzleMacro(int64_t &Imm); 1688 bool parseSwizzleQuadPerm(int64_t &Imm); 1689 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1690 bool parseSwizzleBroadcast(int64_t &Imm); 1691 bool parseSwizzleSwap(int64_t &Imm); 1692 bool parseSwizzleReverse(int64_t &Imm); 1693 1694 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1695 int64_t parseGPRIdxMacro(); 1696 1697 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1698 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1699 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1700 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1701 1702 AMDGPUOperand::Ptr defaultCPol() const; 1703 1704 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1705 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1706 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1707 AMDGPUOperand::Ptr defaultFlatOffset() const; 1708 1709 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1710 1711 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1712 OptionalImmIndexMap &OptionalIdx); 1713 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1714 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1715 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1716 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1717 OptionalImmIndexMap &OptionalIdx); 1718 1719 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1720 1721 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1722 bool IsAtomic = false); 1723 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1724 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1725 1726 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1727 1728 bool parseDimId(unsigned &Encoding); 1729 OperandMatchResultTy parseDim(OperandVector &Operands); 1730 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1731 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1732 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1733 int64_t parseDPPCtrlSel(StringRef Ctrl); 1734 int64_t parseDPPCtrlPerm(); 1735 AMDGPUOperand::Ptr defaultRowMask() const; 1736 AMDGPUOperand::Ptr defaultBankMask() const; 1737 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1738 AMDGPUOperand::Ptr defaultFI() const; 1739 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1740 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1741 1742 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1743 AMDGPUOperand::ImmTy Type); 1744 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1745 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1746 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1747 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1748 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1749 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1750 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1751 uint64_t BasicInstType, 1752 bool SkipDstVcc = false, 1753 bool SkipSrcVcc = false); 1754 1755 AMDGPUOperand::Ptr defaultBLGP() const; 1756 AMDGPUOperand::Ptr defaultCBSZ() const; 1757 AMDGPUOperand::Ptr defaultABID() const; 1758 1759 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1760 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1761 }; 1762 1763 struct OptionalOperand { 1764 const char *Name; 1765 AMDGPUOperand::ImmTy Type; 1766 bool IsBit; 1767 bool (*ConvertResult)(int64_t&); 1768 }; 1769 1770 } // end anonymous namespace 1771 1772 // May be called with integer type with equivalent bitwidth. 1773 static const fltSemantics *getFltSemantics(unsigned Size) { 1774 switch (Size) { 1775 case 4: 1776 return &APFloat::IEEEsingle(); 1777 case 8: 1778 return &APFloat::IEEEdouble(); 1779 case 2: 1780 return &APFloat::IEEEhalf(); 1781 default: 1782 llvm_unreachable("unsupported fp type"); 1783 } 1784 } 1785 1786 static const fltSemantics *getFltSemantics(MVT VT) { 1787 return getFltSemantics(VT.getSizeInBits() / 8); 1788 } 1789 1790 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1791 switch (OperandType) { 1792 case AMDGPU::OPERAND_REG_IMM_INT32: 1793 case AMDGPU::OPERAND_REG_IMM_FP32: 1794 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1795 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1796 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1798 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1799 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1800 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1801 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1802 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1803 case AMDGPU::OPERAND_KIMM32: 1804 return &APFloat::IEEEsingle(); 1805 case AMDGPU::OPERAND_REG_IMM_INT64: 1806 case AMDGPU::OPERAND_REG_IMM_FP64: 1807 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1808 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1809 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1810 return &APFloat::IEEEdouble(); 1811 case AMDGPU::OPERAND_REG_IMM_INT16: 1812 case AMDGPU::OPERAND_REG_IMM_FP16: 1813 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1814 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1815 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1816 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1817 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1818 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1819 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1820 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1822 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1823 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1824 case AMDGPU::OPERAND_KIMM16: 1825 return &APFloat::IEEEhalf(); 1826 default: 1827 llvm_unreachable("unsupported fp type"); 1828 } 1829 } 1830 1831 //===----------------------------------------------------------------------===// 1832 // Operand 1833 //===----------------------------------------------------------------------===// 1834 1835 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1836 bool Lost; 1837 1838 // Convert literal to single precision 1839 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1840 APFloat::rmNearestTiesToEven, 1841 &Lost); 1842 // We allow precision lost but not overflow or underflow 1843 if (Status != APFloat::opOK && 1844 Lost && 1845 ((Status & APFloat::opOverflow) != 0 || 1846 (Status & APFloat::opUnderflow) != 0)) { 1847 return false; 1848 } 1849 1850 return true; 1851 } 1852 1853 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1854 return isUIntN(Size, Val) || isIntN(Size, Val); 1855 } 1856 1857 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1858 if (VT.getScalarType() == MVT::i16) { 1859 // FP immediate values are broken. 1860 return isInlinableIntLiteral(Val); 1861 } 1862 1863 // f16/v2f16 operands work correctly for all values. 1864 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1865 } 1866 1867 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1868 1869 // This is a hack to enable named inline values like 1870 // shared_base with both 32-bit and 64-bit operands. 1871 // Note that these values are defined as 1872 // 32-bit operands only. 1873 if (isInlineValue()) { 1874 return true; 1875 } 1876 1877 if (!isImmTy(ImmTyNone)) { 1878 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1879 return false; 1880 } 1881 // TODO: We should avoid using host float here. It would be better to 1882 // check the float bit values which is what a few other places do. 1883 // We've had bot failures before due to weird NaN support on mips hosts. 1884 1885 APInt Literal(64, Imm.Val); 1886 1887 if (Imm.IsFPImm) { // We got fp literal token 1888 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1889 return AMDGPU::isInlinableLiteral64(Imm.Val, 1890 AsmParser->hasInv2PiInlineImm()); 1891 } 1892 1893 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1894 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1895 return false; 1896 1897 if (type.getScalarSizeInBits() == 16) { 1898 return isInlineableLiteralOp16( 1899 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1900 type, AsmParser->hasInv2PiInlineImm()); 1901 } 1902 1903 // Check if single precision literal is inlinable 1904 return AMDGPU::isInlinableLiteral32( 1905 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1906 AsmParser->hasInv2PiInlineImm()); 1907 } 1908 1909 // We got int literal token. 1910 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1911 return AMDGPU::isInlinableLiteral64(Imm.Val, 1912 AsmParser->hasInv2PiInlineImm()); 1913 } 1914 1915 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1916 return false; 1917 } 1918 1919 if (type.getScalarSizeInBits() == 16) { 1920 return isInlineableLiteralOp16( 1921 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1922 type, AsmParser->hasInv2PiInlineImm()); 1923 } 1924 1925 return AMDGPU::isInlinableLiteral32( 1926 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1927 AsmParser->hasInv2PiInlineImm()); 1928 } 1929 1930 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1931 // Check that this immediate can be added as literal 1932 if (!isImmTy(ImmTyNone)) { 1933 return false; 1934 } 1935 1936 if (!Imm.IsFPImm) { 1937 // We got int literal token. 1938 1939 if (type == MVT::f64 && hasFPModifiers()) { 1940 // Cannot apply fp modifiers to int literals preserving the same semantics 1941 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1942 // disable these cases. 1943 return false; 1944 } 1945 1946 unsigned Size = type.getSizeInBits(); 1947 if (Size == 64) 1948 Size = 32; 1949 1950 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1951 // types. 1952 return isSafeTruncation(Imm.Val, Size); 1953 } 1954 1955 // We got fp literal token 1956 if (type == MVT::f64) { // Expected 64-bit fp operand 1957 // We would set low 64-bits of literal to zeroes but we accept this literals 1958 return true; 1959 } 1960 1961 if (type == MVT::i64) { // Expected 64-bit int operand 1962 // We don't allow fp literals in 64-bit integer instructions. It is 1963 // unclear how we should encode them. 1964 return false; 1965 } 1966 1967 // We allow fp literals with f16x2 operands assuming that the specified 1968 // literal goes into the lower half and the upper half is zero. We also 1969 // require that the literal may be losslessly converted to f16. 1970 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1971 (type == MVT::v2i16)? MVT::i16 : 1972 (type == MVT::v2f32)? MVT::f32 : type; 1973 1974 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1975 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1976 } 1977 1978 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1979 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1980 } 1981 1982 bool AMDGPUOperand::isVRegWithInputMods() const { 1983 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1984 // GFX90A allows DPP on 64-bit operands. 1985 (isRegClass(AMDGPU::VReg_64RegClassID) && 1986 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1987 } 1988 1989 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1990 if (AsmParser->isVI()) 1991 return isVReg32(); 1992 else if (AsmParser->isGFX9Plus()) 1993 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1994 else 1995 return false; 1996 } 1997 1998 bool AMDGPUOperand::isSDWAFP16Operand() const { 1999 return isSDWAOperand(MVT::f16); 2000 } 2001 2002 bool AMDGPUOperand::isSDWAFP32Operand() const { 2003 return isSDWAOperand(MVT::f32); 2004 } 2005 2006 bool AMDGPUOperand::isSDWAInt16Operand() const { 2007 return isSDWAOperand(MVT::i16); 2008 } 2009 2010 bool AMDGPUOperand::isSDWAInt32Operand() const { 2011 return isSDWAOperand(MVT::i32); 2012 } 2013 2014 bool AMDGPUOperand::isBoolReg() const { 2015 auto FB = AsmParser->getFeatureBits(); 2016 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2017 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2018 } 2019 2020 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2021 { 2022 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2023 assert(Size == 2 || Size == 4 || Size == 8); 2024 2025 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2026 2027 if (Imm.Mods.Abs) { 2028 Val &= ~FpSignMask; 2029 } 2030 if (Imm.Mods.Neg) { 2031 Val ^= FpSignMask; 2032 } 2033 2034 return Val; 2035 } 2036 2037 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2038 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2039 Inst.getNumOperands())) { 2040 addLiteralImmOperand(Inst, Imm.Val, 2041 ApplyModifiers & 2042 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2043 } else { 2044 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2045 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2046 setImmKindNone(); 2047 } 2048 } 2049 2050 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2051 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2052 auto OpNum = Inst.getNumOperands(); 2053 // Check that this operand accepts literals 2054 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2055 2056 if (ApplyModifiers) { 2057 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2058 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2059 Val = applyInputFPModifiers(Val, Size); 2060 } 2061 2062 APInt Literal(64, Val); 2063 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2064 2065 if (Imm.IsFPImm) { // We got fp literal token 2066 switch (OpTy) { 2067 case AMDGPU::OPERAND_REG_IMM_INT64: 2068 case AMDGPU::OPERAND_REG_IMM_FP64: 2069 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2070 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2071 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2072 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2073 AsmParser->hasInv2PiInlineImm())) { 2074 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2075 setImmKindConst(); 2076 return; 2077 } 2078 2079 // Non-inlineable 2080 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2081 // For fp operands we check if low 32 bits are zeros 2082 if (Literal.getLoBits(32) != 0) { 2083 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2084 "Can't encode literal as exact 64-bit floating-point operand. " 2085 "Low 32-bits will be set to zero"); 2086 } 2087 2088 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2089 setImmKindLiteral(); 2090 return; 2091 } 2092 2093 // We don't allow fp literals in 64-bit integer instructions. It is 2094 // unclear how we should encode them. This case should be checked earlier 2095 // in predicate methods (isLiteralImm()) 2096 llvm_unreachable("fp literal in 64-bit integer instruction."); 2097 2098 case AMDGPU::OPERAND_REG_IMM_INT32: 2099 case AMDGPU::OPERAND_REG_IMM_FP32: 2100 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2101 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2102 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2103 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2104 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2105 case AMDGPU::OPERAND_REG_IMM_INT16: 2106 case AMDGPU::OPERAND_REG_IMM_FP16: 2107 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2108 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2109 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2110 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2111 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2112 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2113 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2114 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2115 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2116 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2117 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2118 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2119 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2120 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2121 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2122 case AMDGPU::OPERAND_KIMM32: 2123 case AMDGPU::OPERAND_KIMM16: { 2124 bool lost; 2125 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2126 // Convert literal to single precision 2127 FPLiteral.convert(*getOpFltSemantics(OpTy), 2128 APFloat::rmNearestTiesToEven, &lost); 2129 // We allow precision lost but not overflow or underflow. This should be 2130 // checked earlier in isLiteralImm() 2131 2132 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2133 Inst.addOperand(MCOperand::createImm(ImmVal)); 2134 setImmKindLiteral(); 2135 return; 2136 } 2137 default: 2138 llvm_unreachable("invalid operand size"); 2139 } 2140 2141 return; 2142 } 2143 2144 // We got int literal token. 2145 // Only sign extend inline immediates. 2146 switch (OpTy) { 2147 case AMDGPU::OPERAND_REG_IMM_INT32: 2148 case AMDGPU::OPERAND_REG_IMM_FP32: 2149 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2150 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2151 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2152 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2153 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2154 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2155 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2156 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2157 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2158 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2159 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2160 if (isSafeTruncation(Val, 32) && 2161 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2162 AsmParser->hasInv2PiInlineImm())) { 2163 Inst.addOperand(MCOperand::createImm(Val)); 2164 setImmKindConst(); 2165 return; 2166 } 2167 2168 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2169 setImmKindLiteral(); 2170 return; 2171 2172 case AMDGPU::OPERAND_REG_IMM_INT64: 2173 case AMDGPU::OPERAND_REG_IMM_FP64: 2174 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2175 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2176 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2177 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2178 Inst.addOperand(MCOperand::createImm(Val)); 2179 setImmKindConst(); 2180 return; 2181 } 2182 2183 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2184 setImmKindLiteral(); 2185 return; 2186 2187 case AMDGPU::OPERAND_REG_IMM_INT16: 2188 case AMDGPU::OPERAND_REG_IMM_FP16: 2189 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2190 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2191 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2192 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2193 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2194 if (isSafeTruncation(Val, 16) && 2195 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2196 AsmParser->hasInv2PiInlineImm())) { 2197 Inst.addOperand(MCOperand::createImm(Val)); 2198 setImmKindConst(); 2199 return; 2200 } 2201 2202 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2203 setImmKindLiteral(); 2204 return; 2205 2206 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2207 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2208 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2209 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2210 assert(isSafeTruncation(Val, 16)); 2211 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2212 AsmParser->hasInv2PiInlineImm())); 2213 2214 Inst.addOperand(MCOperand::createImm(Val)); 2215 return; 2216 } 2217 case AMDGPU::OPERAND_KIMM32: 2218 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2219 setImmKindNone(); 2220 return; 2221 case AMDGPU::OPERAND_KIMM16: 2222 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2223 setImmKindNone(); 2224 return; 2225 default: 2226 llvm_unreachable("invalid operand size"); 2227 } 2228 } 2229 2230 template <unsigned Bitwidth> 2231 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2232 APInt Literal(64, Imm.Val); 2233 setImmKindNone(); 2234 2235 if (!Imm.IsFPImm) { 2236 // We got int literal token. 2237 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2238 return; 2239 } 2240 2241 bool Lost; 2242 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2243 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2244 APFloat::rmNearestTiesToEven, &Lost); 2245 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2246 } 2247 2248 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2249 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2250 } 2251 2252 static bool isInlineValue(unsigned Reg) { 2253 switch (Reg) { 2254 case AMDGPU::SRC_SHARED_BASE: 2255 case AMDGPU::SRC_SHARED_LIMIT: 2256 case AMDGPU::SRC_PRIVATE_BASE: 2257 case AMDGPU::SRC_PRIVATE_LIMIT: 2258 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2259 return true; 2260 case AMDGPU::SRC_VCCZ: 2261 case AMDGPU::SRC_EXECZ: 2262 case AMDGPU::SRC_SCC: 2263 return true; 2264 case AMDGPU::SGPR_NULL: 2265 return true; 2266 default: 2267 return false; 2268 } 2269 } 2270 2271 bool AMDGPUOperand::isInlineValue() const { 2272 return isRegKind() && ::isInlineValue(getReg()); 2273 } 2274 2275 //===----------------------------------------------------------------------===// 2276 // AsmParser 2277 //===----------------------------------------------------------------------===// 2278 2279 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2280 if (Is == IS_VGPR) { 2281 switch (RegWidth) { 2282 default: return -1; 2283 case 32: 2284 return AMDGPU::VGPR_32RegClassID; 2285 case 64: 2286 return AMDGPU::VReg_64RegClassID; 2287 case 96: 2288 return AMDGPU::VReg_96RegClassID; 2289 case 128: 2290 return AMDGPU::VReg_128RegClassID; 2291 case 160: 2292 return AMDGPU::VReg_160RegClassID; 2293 case 192: 2294 return AMDGPU::VReg_192RegClassID; 2295 case 224: 2296 return AMDGPU::VReg_224RegClassID; 2297 case 256: 2298 return AMDGPU::VReg_256RegClassID; 2299 case 512: 2300 return AMDGPU::VReg_512RegClassID; 2301 case 1024: 2302 return AMDGPU::VReg_1024RegClassID; 2303 } 2304 } else if (Is == IS_TTMP) { 2305 switch (RegWidth) { 2306 default: return -1; 2307 case 32: 2308 return AMDGPU::TTMP_32RegClassID; 2309 case 64: 2310 return AMDGPU::TTMP_64RegClassID; 2311 case 128: 2312 return AMDGPU::TTMP_128RegClassID; 2313 case 256: 2314 return AMDGPU::TTMP_256RegClassID; 2315 case 512: 2316 return AMDGPU::TTMP_512RegClassID; 2317 } 2318 } else if (Is == IS_SGPR) { 2319 switch (RegWidth) { 2320 default: return -1; 2321 case 32: 2322 return AMDGPU::SGPR_32RegClassID; 2323 case 64: 2324 return AMDGPU::SGPR_64RegClassID; 2325 case 96: 2326 return AMDGPU::SGPR_96RegClassID; 2327 case 128: 2328 return AMDGPU::SGPR_128RegClassID; 2329 case 160: 2330 return AMDGPU::SGPR_160RegClassID; 2331 case 192: 2332 return AMDGPU::SGPR_192RegClassID; 2333 case 224: 2334 return AMDGPU::SGPR_224RegClassID; 2335 case 256: 2336 return AMDGPU::SGPR_256RegClassID; 2337 case 512: 2338 return AMDGPU::SGPR_512RegClassID; 2339 } 2340 } else if (Is == IS_AGPR) { 2341 switch (RegWidth) { 2342 default: return -1; 2343 case 32: 2344 return AMDGPU::AGPR_32RegClassID; 2345 case 64: 2346 return AMDGPU::AReg_64RegClassID; 2347 case 96: 2348 return AMDGPU::AReg_96RegClassID; 2349 case 128: 2350 return AMDGPU::AReg_128RegClassID; 2351 case 160: 2352 return AMDGPU::AReg_160RegClassID; 2353 case 192: 2354 return AMDGPU::AReg_192RegClassID; 2355 case 224: 2356 return AMDGPU::AReg_224RegClassID; 2357 case 256: 2358 return AMDGPU::AReg_256RegClassID; 2359 case 512: 2360 return AMDGPU::AReg_512RegClassID; 2361 case 1024: 2362 return AMDGPU::AReg_1024RegClassID; 2363 } 2364 } 2365 return -1; 2366 } 2367 2368 static unsigned getSpecialRegForName(StringRef RegName) { 2369 return StringSwitch<unsigned>(RegName) 2370 .Case("exec", AMDGPU::EXEC) 2371 .Case("vcc", AMDGPU::VCC) 2372 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2373 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2374 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2375 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2376 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2377 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2378 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2379 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2380 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2381 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2382 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2383 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2384 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2385 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2386 .Case("m0", AMDGPU::M0) 2387 .Case("vccz", AMDGPU::SRC_VCCZ) 2388 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2389 .Case("execz", AMDGPU::SRC_EXECZ) 2390 .Case("src_execz", AMDGPU::SRC_EXECZ) 2391 .Case("scc", AMDGPU::SRC_SCC) 2392 .Case("src_scc", AMDGPU::SRC_SCC) 2393 .Case("tba", AMDGPU::TBA) 2394 .Case("tma", AMDGPU::TMA) 2395 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2396 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2397 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2398 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2399 .Case("vcc_lo", AMDGPU::VCC_LO) 2400 .Case("vcc_hi", AMDGPU::VCC_HI) 2401 .Case("exec_lo", AMDGPU::EXEC_LO) 2402 .Case("exec_hi", AMDGPU::EXEC_HI) 2403 .Case("tma_lo", AMDGPU::TMA_LO) 2404 .Case("tma_hi", AMDGPU::TMA_HI) 2405 .Case("tba_lo", AMDGPU::TBA_LO) 2406 .Case("tba_hi", AMDGPU::TBA_HI) 2407 .Case("pc", AMDGPU::PC_REG) 2408 .Case("null", AMDGPU::SGPR_NULL) 2409 .Default(AMDGPU::NoRegister); 2410 } 2411 2412 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2413 SMLoc &EndLoc, bool RestoreOnFailure) { 2414 auto R = parseRegister(); 2415 if (!R) return true; 2416 assert(R->isReg()); 2417 RegNo = R->getReg(); 2418 StartLoc = R->getStartLoc(); 2419 EndLoc = R->getEndLoc(); 2420 return false; 2421 } 2422 2423 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2424 SMLoc &EndLoc) { 2425 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2426 } 2427 2428 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2429 SMLoc &StartLoc, 2430 SMLoc &EndLoc) { 2431 bool Result = 2432 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2433 bool PendingErrors = getParser().hasPendingError(); 2434 getParser().clearPendingErrors(); 2435 if (PendingErrors) 2436 return MatchOperand_ParseFail; 2437 if (Result) 2438 return MatchOperand_NoMatch; 2439 return MatchOperand_Success; 2440 } 2441 2442 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2443 RegisterKind RegKind, unsigned Reg1, 2444 SMLoc Loc) { 2445 switch (RegKind) { 2446 case IS_SPECIAL: 2447 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2448 Reg = AMDGPU::EXEC; 2449 RegWidth = 64; 2450 return true; 2451 } 2452 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2453 Reg = AMDGPU::FLAT_SCR; 2454 RegWidth = 64; 2455 return true; 2456 } 2457 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2458 Reg = AMDGPU::XNACK_MASK; 2459 RegWidth = 64; 2460 return true; 2461 } 2462 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2463 Reg = AMDGPU::VCC; 2464 RegWidth = 64; 2465 return true; 2466 } 2467 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2468 Reg = AMDGPU::TBA; 2469 RegWidth = 64; 2470 return true; 2471 } 2472 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2473 Reg = AMDGPU::TMA; 2474 RegWidth = 64; 2475 return true; 2476 } 2477 Error(Loc, "register does not fit in the list"); 2478 return false; 2479 case IS_VGPR: 2480 case IS_SGPR: 2481 case IS_AGPR: 2482 case IS_TTMP: 2483 if (Reg1 != Reg + RegWidth / 32) { 2484 Error(Loc, "registers in a list must have consecutive indices"); 2485 return false; 2486 } 2487 RegWidth += 32; 2488 return true; 2489 default: 2490 llvm_unreachable("unexpected register kind"); 2491 } 2492 } 2493 2494 struct RegInfo { 2495 StringLiteral Name; 2496 RegisterKind Kind; 2497 }; 2498 2499 static constexpr RegInfo RegularRegisters[] = { 2500 {{"v"}, IS_VGPR}, 2501 {{"s"}, IS_SGPR}, 2502 {{"ttmp"}, IS_TTMP}, 2503 {{"acc"}, IS_AGPR}, 2504 {{"a"}, IS_AGPR}, 2505 }; 2506 2507 static bool isRegularReg(RegisterKind Kind) { 2508 return Kind == IS_VGPR || 2509 Kind == IS_SGPR || 2510 Kind == IS_TTMP || 2511 Kind == IS_AGPR; 2512 } 2513 2514 static const RegInfo* getRegularRegInfo(StringRef Str) { 2515 for (const RegInfo &Reg : RegularRegisters) 2516 if (Str.startswith(Reg.Name)) 2517 return &Reg; 2518 return nullptr; 2519 } 2520 2521 static bool getRegNum(StringRef Str, unsigned& Num) { 2522 return !Str.getAsInteger(10, Num); 2523 } 2524 2525 bool 2526 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2527 const AsmToken &NextToken) const { 2528 2529 // A list of consecutive registers: [s0,s1,s2,s3] 2530 if (Token.is(AsmToken::LBrac)) 2531 return true; 2532 2533 if (!Token.is(AsmToken::Identifier)) 2534 return false; 2535 2536 // A single register like s0 or a range of registers like s[0:1] 2537 2538 StringRef Str = Token.getString(); 2539 const RegInfo *Reg = getRegularRegInfo(Str); 2540 if (Reg) { 2541 StringRef RegName = Reg->Name; 2542 StringRef RegSuffix = Str.substr(RegName.size()); 2543 if (!RegSuffix.empty()) { 2544 unsigned Num; 2545 // A single register with an index: rXX 2546 if (getRegNum(RegSuffix, Num)) 2547 return true; 2548 } else { 2549 // A range of registers: r[XX:YY]. 2550 if (NextToken.is(AsmToken::LBrac)) 2551 return true; 2552 } 2553 } 2554 2555 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2556 } 2557 2558 bool 2559 AMDGPUAsmParser::isRegister() 2560 { 2561 return isRegister(getToken(), peekToken()); 2562 } 2563 2564 unsigned 2565 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2566 unsigned RegNum, 2567 unsigned RegWidth, 2568 SMLoc Loc) { 2569 2570 assert(isRegularReg(RegKind)); 2571 2572 unsigned AlignSize = 1; 2573 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2574 // SGPR and TTMP registers must be aligned. 2575 // Max required alignment is 4 dwords. 2576 AlignSize = std::min(RegWidth / 32, 4u); 2577 } 2578 2579 if (RegNum % AlignSize != 0) { 2580 Error(Loc, "invalid register alignment"); 2581 return AMDGPU::NoRegister; 2582 } 2583 2584 unsigned RegIdx = RegNum / AlignSize; 2585 int RCID = getRegClass(RegKind, RegWidth); 2586 if (RCID == -1) { 2587 Error(Loc, "invalid or unsupported register size"); 2588 return AMDGPU::NoRegister; 2589 } 2590 2591 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2592 const MCRegisterClass RC = TRI->getRegClass(RCID); 2593 if (RegIdx >= RC.getNumRegs()) { 2594 Error(Loc, "register index is out of range"); 2595 return AMDGPU::NoRegister; 2596 } 2597 2598 return RC.getRegister(RegIdx); 2599 } 2600 2601 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2602 int64_t RegLo, RegHi; 2603 if (!skipToken(AsmToken::LBrac, "missing register index")) 2604 return false; 2605 2606 SMLoc FirstIdxLoc = getLoc(); 2607 SMLoc SecondIdxLoc; 2608 2609 if (!parseExpr(RegLo)) 2610 return false; 2611 2612 if (trySkipToken(AsmToken::Colon)) { 2613 SecondIdxLoc = getLoc(); 2614 if (!parseExpr(RegHi)) 2615 return false; 2616 } else { 2617 RegHi = RegLo; 2618 } 2619 2620 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2621 return false; 2622 2623 if (!isUInt<32>(RegLo)) { 2624 Error(FirstIdxLoc, "invalid register index"); 2625 return false; 2626 } 2627 2628 if (!isUInt<32>(RegHi)) { 2629 Error(SecondIdxLoc, "invalid register index"); 2630 return false; 2631 } 2632 2633 if (RegLo > RegHi) { 2634 Error(FirstIdxLoc, "first register index should not exceed second index"); 2635 return false; 2636 } 2637 2638 Num = static_cast<unsigned>(RegLo); 2639 RegWidth = 32 * ((RegHi - RegLo) + 1); 2640 return true; 2641 } 2642 2643 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2644 unsigned &RegNum, unsigned &RegWidth, 2645 SmallVectorImpl<AsmToken> &Tokens) { 2646 assert(isToken(AsmToken::Identifier)); 2647 unsigned Reg = getSpecialRegForName(getTokenStr()); 2648 if (Reg) { 2649 RegNum = 0; 2650 RegWidth = 32; 2651 RegKind = IS_SPECIAL; 2652 Tokens.push_back(getToken()); 2653 lex(); // skip register name 2654 } 2655 return Reg; 2656 } 2657 2658 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2659 unsigned &RegNum, unsigned &RegWidth, 2660 SmallVectorImpl<AsmToken> &Tokens) { 2661 assert(isToken(AsmToken::Identifier)); 2662 StringRef RegName = getTokenStr(); 2663 auto Loc = getLoc(); 2664 2665 const RegInfo *RI = getRegularRegInfo(RegName); 2666 if (!RI) { 2667 Error(Loc, "invalid register name"); 2668 return AMDGPU::NoRegister; 2669 } 2670 2671 Tokens.push_back(getToken()); 2672 lex(); // skip register name 2673 2674 RegKind = RI->Kind; 2675 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2676 if (!RegSuffix.empty()) { 2677 // Single 32-bit register: vXX. 2678 if (!getRegNum(RegSuffix, RegNum)) { 2679 Error(Loc, "invalid register index"); 2680 return AMDGPU::NoRegister; 2681 } 2682 RegWidth = 32; 2683 } else { 2684 // Range of registers: v[XX:YY]. ":YY" is optional. 2685 if (!ParseRegRange(RegNum, RegWidth)) 2686 return AMDGPU::NoRegister; 2687 } 2688 2689 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2690 } 2691 2692 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2693 unsigned &RegWidth, 2694 SmallVectorImpl<AsmToken> &Tokens) { 2695 unsigned Reg = AMDGPU::NoRegister; 2696 auto ListLoc = getLoc(); 2697 2698 if (!skipToken(AsmToken::LBrac, 2699 "expected a register or a list of registers")) { 2700 return AMDGPU::NoRegister; 2701 } 2702 2703 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2704 2705 auto Loc = getLoc(); 2706 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2707 return AMDGPU::NoRegister; 2708 if (RegWidth != 32) { 2709 Error(Loc, "expected a single 32-bit register"); 2710 return AMDGPU::NoRegister; 2711 } 2712 2713 for (; trySkipToken(AsmToken::Comma); ) { 2714 RegisterKind NextRegKind; 2715 unsigned NextReg, NextRegNum, NextRegWidth; 2716 Loc = getLoc(); 2717 2718 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2719 NextRegNum, NextRegWidth, 2720 Tokens)) { 2721 return AMDGPU::NoRegister; 2722 } 2723 if (NextRegWidth != 32) { 2724 Error(Loc, "expected a single 32-bit register"); 2725 return AMDGPU::NoRegister; 2726 } 2727 if (NextRegKind != RegKind) { 2728 Error(Loc, "registers in a list must be of the same kind"); 2729 return AMDGPU::NoRegister; 2730 } 2731 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2732 return AMDGPU::NoRegister; 2733 } 2734 2735 if (!skipToken(AsmToken::RBrac, 2736 "expected a comma or a closing square bracket")) { 2737 return AMDGPU::NoRegister; 2738 } 2739 2740 if (isRegularReg(RegKind)) 2741 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2742 2743 return Reg; 2744 } 2745 2746 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2747 unsigned &RegNum, unsigned &RegWidth, 2748 SmallVectorImpl<AsmToken> &Tokens) { 2749 auto Loc = getLoc(); 2750 Reg = AMDGPU::NoRegister; 2751 2752 if (isToken(AsmToken::Identifier)) { 2753 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2754 if (Reg == AMDGPU::NoRegister) 2755 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2756 } else { 2757 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2758 } 2759 2760 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2761 if (Reg == AMDGPU::NoRegister) { 2762 assert(Parser.hasPendingError()); 2763 return false; 2764 } 2765 2766 if (!subtargetHasRegister(*TRI, Reg)) { 2767 if (Reg == AMDGPU::SGPR_NULL) { 2768 Error(Loc, "'null' operand is not supported on this GPU"); 2769 } else { 2770 Error(Loc, "register not available on this GPU"); 2771 } 2772 return false; 2773 } 2774 2775 return true; 2776 } 2777 2778 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2779 unsigned &RegNum, unsigned &RegWidth, 2780 bool RestoreOnFailure /*=false*/) { 2781 Reg = AMDGPU::NoRegister; 2782 2783 SmallVector<AsmToken, 1> Tokens; 2784 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2785 if (RestoreOnFailure) { 2786 while (!Tokens.empty()) { 2787 getLexer().UnLex(Tokens.pop_back_val()); 2788 } 2789 } 2790 return true; 2791 } 2792 return false; 2793 } 2794 2795 Optional<StringRef> 2796 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2797 switch (RegKind) { 2798 case IS_VGPR: 2799 return StringRef(".amdgcn.next_free_vgpr"); 2800 case IS_SGPR: 2801 return StringRef(".amdgcn.next_free_sgpr"); 2802 default: 2803 return None; 2804 } 2805 } 2806 2807 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2808 auto SymbolName = getGprCountSymbolName(RegKind); 2809 assert(SymbolName && "initializing invalid register kind"); 2810 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2811 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2812 } 2813 2814 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2815 unsigned DwordRegIndex, 2816 unsigned RegWidth) { 2817 // Symbols are only defined for GCN targets 2818 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2819 return true; 2820 2821 auto SymbolName = getGprCountSymbolName(RegKind); 2822 if (!SymbolName) 2823 return true; 2824 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2825 2826 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2827 int64_t OldCount; 2828 2829 if (!Sym->isVariable()) 2830 return !Error(getLoc(), 2831 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2832 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2833 return !Error( 2834 getLoc(), 2835 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2836 2837 if (OldCount <= NewMax) 2838 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2839 2840 return true; 2841 } 2842 2843 std::unique_ptr<AMDGPUOperand> 2844 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2845 const auto &Tok = getToken(); 2846 SMLoc StartLoc = Tok.getLoc(); 2847 SMLoc EndLoc = Tok.getEndLoc(); 2848 RegisterKind RegKind; 2849 unsigned Reg, RegNum, RegWidth; 2850 2851 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2852 return nullptr; 2853 } 2854 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2855 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2856 return nullptr; 2857 } else 2858 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2859 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2860 } 2861 2862 OperandMatchResultTy 2863 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2864 // TODO: add syntactic sugar for 1/(2*PI) 2865 2866 assert(!isRegister()); 2867 assert(!isModifier()); 2868 2869 const auto& Tok = getToken(); 2870 const auto& NextTok = peekToken(); 2871 bool IsReal = Tok.is(AsmToken::Real); 2872 SMLoc S = getLoc(); 2873 bool Negate = false; 2874 2875 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2876 lex(); 2877 IsReal = true; 2878 Negate = true; 2879 } 2880 2881 if (IsReal) { 2882 // Floating-point expressions are not supported. 2883 // Can only allow floating-point literals with an 2884 // optional sign. 2885 2886 StringRef Num = getTokenStr(); 2887 lex(); 2888 2889 APFloat RealVal(APFloat::IEEEdouble()); 2890 auto roundMode = APFloat::rmNearestTiesToEven; 2891 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2892 return MatchOperand_ParseFail; 2893 } 2894 if (Negate) 2895 RealVal.changeSign(); 2896 2897 Operands.push_back( 2898 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2899 AMDGPUOperand::ImmTyNone, true)); 2900 2901 return MatchOperand_Success; 2902 2903 } else { 2904 int64_t IntVal; 2905 const MCExpr *Expr; 2906 SMLoc S = getLoc(); 2907 2908 if (HasSP3AbsModifier) { 2909 // This is a workaround for handling expressions 2910 // as arguments of SP3 'abs' modifier, for example: 2911 // |1.0| 2912 // |-1| 2913 // |1+x| 2914 // This syntax is not compatible with syntax of standard 2915 // MC expressions (due to the trailing '|'). 2916 SMLoc EndLoc; 2917 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2918 return MatchOperand_ParseFail; 2919 } else { 2920 if (Parser.parseExpression(Expr)) 2921 return MatchOperand_ParseFail; 2922 } 2923 2924 if (Expr->evaluateAsAbsolute(IntVal)) { 2925 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2926 } else { 2927 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2928 } 2929 2930 return MatchOperand_Success; 2931 } 2932 2933 return MatchOperand_NoMatch; 2934 } 2935 2936 OperandMatchResultTy 2937 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2938 if (!isRegister()) 2939 return MatchOperand_NoMatch; 2940 2941 if (auto R = parseRegister()) { 2942 assert(R->isReg()); 2943 Operands.push_back(std::move(R)); 2944 return MatchOperand_Success; 2945 } 2946 return MatchOperand_ParseFail; 2947 } 2948 2949 OperandMatchResultTy 2950 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2951 auto res = parseReg(Operands); 2952 if (res != MatchOperand_NoMatch) { 2953 return res; 2954 } else if (isModifier()) { 2955 return MatchOperand_NoMatch; 2956 } else { 2957 return parseImm(Operands, HasSP3AbsMod); 2958 } 2959 } 2960 2961 bool 2962 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2963 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2964 const auto &str = Token.getString(); 2965 return str == "abs" || str == "neg" || str == "sext"; 2966 } 2967 return false; 2968 } 2969 2970 bool 2971 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2972 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2973 } 2974 2975 bool 2976 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2977 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2978 } 2979 2980 bool 2981 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2982 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2983 } 2984 2985 // Check if this is an operand modifier or an opcode modifier 2986 // which may look like an expression but it is not. We should 2987 // avoid parsing these modifiers as expressions. Currently 2988 // recognized sequences are: 2989 // |...| 2990 // abs(...) 2991 // neg(...) 2992 // sext(...) 2993 // -reg 2994 // -|...| 2995 // -abs(...) 2996 // name:... 2997 // Note that simple opcode modifiers like 'gds' may be parsed as 2998 // expressions; this is a special case. See getExpressionAsToken. 2999 // 3000 bool 3001 AMDGPUAsmParser::isModifier() { 3002 3003 AsmToken Tok = getToken(); 3004 AsmToken NextToken[2]; 3005 peekTokens(NextToken); 3006 3007 return isOperandModifier(Tok, NextToken[0]) || 3008 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 3009 isOpcodeModifierWithVal(Tok, NextToken[0]); 3010 } 3011 3012 // Check if the current token is an SP3 'neg' modifier. 3013 // Currently this modifier is allowed in the following context: 3014 // 3015 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3016 // 2. Before an 'abs' modifier: -abs(...) 3017 // 3. Before an SP3 'abs' modifier: -|...| 3018 // 3019 // In all other cases "-" is handled as a part 3020 // of an expression that follows the sign. 3021 // 3022 // Note: When "-" is followed by an integer literal, 3023 // this is interpreted as integer negation rather 3024 // than a floating-point NEG modifier applied to N. 3025 // Beside being contr-intuitive, such use of floating-point 3026 // NEG modifier would have resulted in different meaning 3027 // of integer literals used with VOP1/2/C and VOP3, 3028 // for example: 3029 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3030 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3031 // Negative fp literals with preceding "-" are 3032 // handled likewise for uniformity 3033 // 3034 bool 3035 AMDGPUAsmParser::parseSP3NegModifier() { 3036 3037 AsmToken NextToken[2]; 3038 peekTokens(NextToken); 3039 3040 if (isToken(AsmToken::Minus) && 3041 (isRegister(NextToken[0], NextToken[1]) || 3042 NextToken[0].is(AsmToken::Pipe) || 3043 isId(NextToken[0], "abs"))) { 3044 lex(); 3045 return true; 3046 } 3047 3048 return false; 3049 } 3050 3051 OperandMatchResultTy 3052 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3053 bool AllowImm) { 3054 bool Neg, SP3Neg; 3055 bool Abs, SP3Abs; 3056 SMLoc Loc; 3057 3058 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3059 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3060 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3061 return MatchOperand_ParseFail; 3062 } 3063 3064 SP3Neg = parseSP3NegModifier(); 3065 3066 Loc = getLoc(); 3067 Neg = trySkipId("neg"); 3068 if (Neg && SP3Neg) { 3069 Error(Loc, "expected register or immediate"); 3070 return MatchOperand_ParseFail; 3071 } 3072 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3073 return MatchOperand_ParseFail; 3074 3075 Abs = trySkipId("abs"); 3076 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3077 return MatchOperand_ParseFail; 3078 3079 Loc = getLoc(); 3080 SP3Abs = trySkipToken(AsmToken::Pipe); 3081 if (Abs && SP3Abs) { 3082 Error(Loc, "expected register or immediate"); 3083 return MatchOperand_ParseFail; 3084 } 3085 3086 OperandMatchResultTy Res; 3087 if (AllowImm) { 3088 Res = parseRegOrImm(Operands, SP3Abs); 3089 } else { 3090 Res = parseReg(Operands); 3091 } 3092 if (Res != MatchOperand_Success) { 3093 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3094 } 3095 3096 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3097 return MatchOperand_ParseFail; 3098 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3099 return MatchOperand_ParseFail; 3100 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3101 return MatchOperand_ParseFail; 3102 3103 AMDGPUOperand::Modifiers Mods; 3104 Mods.Abs = Abs || SP3Abs; 3105 Mods.Neg = Neg || SP3Neg; 3106 3107 if (Mods.hasFPModifiers()) { 3108 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3109 if (Op.isExpr()) { 3110 Error(Op.getStartLoc(), "expected an absolute expression"); 3111 return MatchOperand_ParseFail; 3112 } 3113 Op.setModifiers(Mods); 3114 } 3115 return MatchOperand_Success; 3116 } 3117 3118 OperandMatchResultTy 3119 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3120 bool AllowImm) { 3121 bool Sext = trySkipId("sext"); 3122 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3123 return MatchOperand_ParseFail; 3124 3125 OperandMatchResultTy Res; 3126 if (AllowImm) { 3127 Res = parseRegOrImm(Operands); 3128 } else { 3129 Res = parseReg(Operands); 3130 } 3131 if (Res != MatchOperand_Success) { 3132 return Sext? MatchOperand_ParseFail : Res; 3133 } 3134 3135 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3136 return MatchOperand_ParseFail; 3137 3138 AMDGPUOperand::Modifiers Mods; 3139 Mods.Sext = Sext; 3140 3141 if (Mods.hasIntModifiers()) { 3142 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3143 if (Op.isExpr()) { 3144 Error(Op.getStartLoc(), "expected an absolute expression"); 3145 return MatchOperand_ParseFail; 3146 } 3147 Op.setModifiers(Mods); 3148 } 3149 3150 return MatchOperand_Success; 3151 } 3152 3153 OperandMatchResultTy 3154 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3155 return parseRegOrImmWithFPInputMods(Operands, false); 3156 } 3157 3158 OperandMatchResultTy 3159 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3160 return parseRegOrImmWithIntInputMods(Operands, false); 3161 } 3162 3163 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3164 auto Loc = getLoc(); 3165 if (trySkipId("off")) { 3166 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3167 AMDGPUOperand::ImmTyOff, false)); 3168 return MatchOperand_Success; 3169 } 3170 3171 if (!isRegister()) 3172 return MatchOperand_NoMatch; 3173 3174 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3175 if (Reg) { 3176 Operands.push_back(std::move(Reg)); 3177 return MatchOperand_Success; 3178 } 3179 3180 return MatchOperand_ParseFail; 3181 3182 } 3183 3184 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3185 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3186 3187 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3188 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3189 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3190 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3191 return Match_InvalidOperand; 3192 3193 if ((TSFlags & SIInstrFlags::VOP3) && 3194 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3195 getForcedEncodingSize() != 64) 3196 return Match_PreferE32; 3197 3198 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3199 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3200 // v_mac_f32/16 allow only dst_sel == DWORD; 3201 auto OpNum = 3202 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3203 const auto &Op = Inst.getOperand(OpNum); 3204 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3205 return Match_InvalidOperand; 3206 } 3207 } 3208 3209 return Match_Success; 3210 } 3211 3212 static ArrayRef<unsigned> getAllVariants() { 3213 static const unsigned Variants[] = { 3214 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3215 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3216 }; 3217 3218 return makeArrayRef(Variants); 3219 } 3220 3221 // What asm variants we should check 3222 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3223 if (getForcedEncodingSize() == 32) { 3224 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3225 return makeArrayRef(Variants); 3226 } 3227 3228 if (isForcedVOP3()) { 3229 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3230 return makeArrayRef(Variants); 3231 } 3232 3233 if (isForcedSDWA()) { 3234 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3235 AMDGPUAsmVariants::SDWA9}; 3236 return makeArrayRef(Variants); 3237 } 3238 3239 if (isForcedDPP()) { 3240 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3241 return makeArrayRef(Variants); 3242 } 3243 3244 return getAllVariants(); 3245 } 3246 3247 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3248 if (getForcedEncodingSize() == 32) 3249 return "e32"; 3250 3251 if (isForcedVOP3()) 3252 return "e64"; 3253 3254 if (isForcedSDWA()) 3255 return "sdwa"; 3256 3257 if (isForcedDPP()) 3258 return "dpp"; 3259 3260 return ""; 3261 } 3262 3263 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3264 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3265 const unsigned Num = Desc.getNumImplicitUses(); 3266 for (unsigned i = 0; i < Num; ++i) { 3267 unsigned Reg = Desc.ImplicitUses[i]; 3268 switch (Reg) { 3269 case AMDGPU::FLAT_SCR: 3270 case AMDGPU::VCC: 3271 case AMDGPU::VCC_LO: 3272 case AMDGPU::VCC_HI: 3273 case AMDGPU::M0: 3274 return Reg; 3275 default: 3276 break; 3277 } 3278 } 3279 return AMDGPU::NoRegister; 3280 } 3281 3282 // NB: This code is correct only when used to check constant 3283 // bus limitations because GFX7 support no f16 inline constants. 3284 // Note that there are no cases when a GFX7 opcode violates 3285 // constant bus limitations due to the use of an f16 constant. 3286 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3287 unsigned OpIdx) const { 3288 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3289 3290 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3291 return false; 3292 } 3293 3294 const MCOperand &MO = Inst.getOperand(OpIdx); 3295 3296 int64_t Val = MO.getImm(); 3297 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3298 3299 switch (OpSize) { // expected operand size 3300 case 8: 3301 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3302 case 4: 3303 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3304 case 2: { 3305 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3306 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3307 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3308 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3309 return AMDGPU::isInlinableIntLiteral(Val); 3310 3311 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3312 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3313 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3314 return AMDGPU::isInlinableIntLiteralV216(Val); 3315 3316 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3317 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3318 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3319 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3320 3321 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3322 } 3323 default: 3324 llvm_unreachable("invalid operand size"); 3325 } 3326 } 3327 3328 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3329 if (!isGFX10Plus()) 3330 return 1; 3331 3332 switch (Opcode) { 3333 // 64-bit shift instructions can use only one scalar value input 3334 case AMDGPU::V_LSHLREV_B64_e64: 3335 case AMDGPU::V_LSHLREV_B64_gfx10: 3336 case AMDGPU::V_LSHRREV_B64_e64: 3337 case AMDGPU::V_LSHRREV_B64_gfx10: 3338 case AMDGPU::V_ASHRREV_I64_e64: 3339 case AMDGPU::V_ASHRREV_I64_gfx10: 3340 case AMDGPU::V_LSHL_B64_e64: 3341 case AMDGPU::V_LSHR_B64_e64: 3342 case AMDGPU::V_ASHR_I64_e64: 3343 return 1; 3344 default: 3345 return 2; 3346 } 3347 } 3348 3349 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3350 const MCOperand &MO = Inst.getOperand(OpIdx); 3351 if (MO.isImm()) { 3352 return !isInlineConstant(Inst, OpIdx); 3353 } else if (MO.isReg()) { 3354 auto Reg = MO.getReg(); 3355 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3356 auto PReg = mc2PseudoReg(Reg); 3357 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3358 } else { 3359 return true; 3360 } 3361 } 3362 3363 bool 3364 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3365 const OperandVector &Operands) { 3366 const unsigned Opcode = Inst.getOpcode(); 3367 const MCInstrDesc &Desc = MII.get(Opcode); 3368 unsigned LastSGPR = AMDGPU::NoRegister; 3369 unsigned ConstantBusUseCount = 0; 3370 unsigned NumLiterals = 0; 3371 unsigned LiteralSize; 3372 3373 if (Desc.TSFlags & 3374 (SIInstrFlags::VOPC | 3375 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3376 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3377 SIInstrFlags::SDWA)) { 3378 // Check special imm operands (used by madmk, etc) 3379 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3380 ++NumLiterals; 3381 LiteralSize = 4; 3382 } 3383 3384 SmallDenseSet<unsigned> SGPRsUsed; 3385 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3386 if (SGPRUsed != AMDGPU::NoRegister) { 3387 SGPRsUsed.insert(SGPRUsed); 3388 ++ConstantBusUseCount; 3389 } 3390 3391 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3392 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3393 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3394 3395 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3396 3397 for (int OpIdx : OpIndices) { 3398 if (OpIdx == -1) break; 3399 3400 const MCOperand &MO = Inst.getOperand(OpIdx); 3401 if (usesConstantBus(Inst, OpIdx)) { 3402 if (MO.isReg()) { 3403 LastSGPR = mc2PseudoReg(MO.getReg()); 3404 // Pairs of registers with a partial intersections like these 3405 // s0, s[0:1] 3406 // flat_scratch_lo, flat_scratch 3407 // flat_scratch_lo, flat_scratch_hi 3408 // are theoretically valid but they are disabled anyway. 3409 // Note that this code mimics SIInstrInfo::verifyInstruction 3410 if (!SGPRsUsed.count(LastSGPR)) { 3411 SGPRsUsed.insert(LastSGPR); 3412 ++ConstantBusUseCount; 3413 } 3414 } else { // Expression or a literal 3415 3416 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3417 continue; // special operand like VINTERP attr_chan 3418 3419 // An instruction may use only one literal. 3420 // This has been validated on the previous step. 3421 // See validateVOPLiteral. 3422 // This literal may be used as more than one operand. 3423 // If all these operands are of the same size, 3424 // this literal counts as one scalar value. 3425 // Otherwise it counts as 2 scalar values. 3426 // See "GFX10 Shader Programming", section 3.6.2.3. 3427 3428 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3429 if (Size < 4) Size = 4; 3430 3431 if (NumLiterals == 0) { 3432 NumLiterals = 1; 3433 LiteralSize = Size; 3434 } else if (LiteralSize != Size) { 3435 NumLiterals = 2; 3436 } 3437 } 3438 } 3439 } 3440 } 3441 ConstantBusUseCount += NumLiterals; 3442 3443 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3444 return true; 3445 3446 SMLoc LitLoc = getLitLoc(Operands); 3447 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3448 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3449 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3450 return false; 3451 } 3452 3453 bool 3454 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3455 const OperandVector &Operands) { 3456 const unsigned Opcode = Inst.getOpcode(); 3457 const MCInstrDesc &Desc = MII.get(Opcode); 3458 3459 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3460 if (DstIdx == -1 || 3461 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3462 return true; 3463 } 3464 3465 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3466 3467 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3468 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3469 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3470 3471 assert(DstIdx != -1); 3472 const MCOperand &Dst = Inst.getOperand(DstIdx); 3473 assert(Dst.isReg()); 3474 3475 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3476 3477 for (int SrcIdx : SrcIndices) { 3478 if (SrcIdx == -1) break; 3479 const MCOperand &Src = Inst.getOperand(SrcIdx); 3480 if (Src.isReg()) { 3481 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3482 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3483 Error(getRegLoc(SrcReg, Operands), 3484 "destination must be different than all sources"); 3485 return false; 3486 } 3487 } 3488 } 3489 3490 return true; 3491 } 3492 3493 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3494 3495 const unsigned Opc = Inst.getOpcode(); 3496 const MCInstrDesc &Desc = MII.get(Opc); 3497 3498 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3499 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3500 assert(ClampIdx != -1); 3501 return Inst.getOperand(ClampIdx).getImm() == 0; 3502 } 3503 3504 return true; 3505 } 3506 3507 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3508 3509 const unsigned Opc = Inst.getOpcode(); 3510 const MCInstrDesc &Desc = MII.get(Opc); 3511 3512 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3513 return None; 3514 3515 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3516 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3517 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3518 3519 assert(VDataIdx != -1); 3520 3521 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3522 return None; 3523 3524 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3525 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3526 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3527 if (DMask == 0) 3528 DMask = 1; 3529 3530 bool isPackedD16 = false; 3531 unsigned DataSize = 3532 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3533 if (hasPackedD16()) { 3534 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3535 isPackedD16 = D16Idx >= 0; 3536 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3537 DataSize = (DataSize + 1) / 2; 3538 } 3539 3540 if ((VDataSize / 4) == DataSize + TFESize) 3541 return None; 3542 3543 return StringRef(isPackedD16 3544 ? "image data size does not match dmask, d16 and tfe" 3545 : "image data size does not match dmask and tfe"); 3546 } 3547 3548 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3549 const unsigned Opc = Inst.getOpcode(); 3550 const MCInstrDesc &Desc = MII.get(Opc); 3551 3552 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3553 return true; 3554 3555 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3556 3557 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3558 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3559 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3560 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3561 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3562 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3563 3564 assert(VAddr0Idx != -1); 3565 assert(SrsrcIdx != -1); 3566 assert(SrsrcIdx > VAddr0Idx); 3567 3568 if (DimIdx == -1) 3569 return true; // intersect_ray 3570 3571 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3572 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3573 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3574 unsigned ActualAddrSize = 3575 IsNSA ? SrsrcIdx - VAddr0Idx 3576 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3577 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3578 3579 unsigned ExpectedAddrSize = 3580 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3581 3582 if (!IsNSA) { 3583 if (ExpectedAddrSize > 8) 3584 ExpectedAddrSize = 16; 3585 3586 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3587 // This provides backward compatibility for assembly created 3588 // before 160b/192b/224b types were directly supported. 3589 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3590 return true; 3591 } 3592 3593 return ActualAddrSize == ExpectedAddrSize; 3594 } 3595 3596 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3597 3598 const unsigned Opc = Inst.getOpcode(); 3599 const MCInstrDesc &Desc = MII.get(Opc); 3600 3601 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3602 return true; 3603 if (!Desc.mayLoad() || !Desc.mayStore()) 3604 return true; // Not atomic 3605 3606 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3607 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3608 3609 // This is an incomplete check because image_atomic_cmpswap 3610 // may only use 0x3 and 0xf while other atomic operations 3611 // may use 0x1 and 0x3. However these limitations are 3612 // verified when we check that dmask matches dst size. 3613 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3614 } 3615 3616 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3617 3618 const unsigned Opc = Inst.getOpcode(); 3619 const MCInstrDesc &Desc = MII.get(Opc); 3620 3621 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3622 return true; 3623 3624 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3625 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3626 3627 // GATHER4 instructions use dmask in a different fashion compared to 3628 // other MIMG instructions. The only useful DMASK values are 3629 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3630 // (red,red,red,red) etc.) The ISA document doesn't mention 3631 // this. 3632 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3633 } 3634 3635 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3636 const unsigned Opc = Inst.getOpcode(); 3637 const MCInstrDesc &Desc = MII.get(Opc); 3638 3639 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3640 return true; 3641 3642 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3643 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3644 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3645 3646 if (!BaseOpcode->MSAA) 3647 return true; 3648 3649 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3650 assert(DimIdx != -1); 3651 3652 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3653 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3654 3655 return DimInfo->MSAA; 3656 } 3657 3658 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3659 { 3660 switch (Opcode) { 3661 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3662 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3663 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3664 return true; 3665 default: 3666 return false; 3667 } 3668 } 3669 3670 // movrels* opcodes should only allow VGPRS as src0. 3671 // This is specified in .td description for vop1/vop3, 3672 // but sdwa is handled differently. See isSDWAOperand. 3673 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3674 const OperandVector &Operands) { 3675 3676 const unsigned Opc = Inst.getOpcode(); 3677 const MCInstrDesc &Desc = MII.get(Opc); 3678 3679 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3680 return true; 3681 3682 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3683 assert(Src0Idx != -1); 3684 3685 SMLoc ErrLoc; 3686 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3687 if (Src0.isReg()) { 3688 auto Reg = mc2PseudoReg(Src0.getReg()); 3689 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3690 if (!isSGPR(Reg, TRI)) 3691 return true; 3692 ErrLoc = getRegLoc(Reg, Operands); 3693 } else { 3694 ErrLoc = getConstLoc(Operands); 3695 } 3696 3697 Error(ErrLoc, "source operand must be a VGPR"); 3698 return false; 3699 } 3700 3701 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3702 const OperandVector &Operands) { 3703 3704 const unsigned Opc = Inst.getOpcode(); 3705 3706 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3707 return true; 3708 3709 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3710 assert(Src0Idx != -1); 3711 3712 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3713 if (!Src0.isReg()) 3714 return true; 3715 3716 auto Reg = mc2PseudoReg(Src0.getReg()); 3717 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3718 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3719 Error(getRegLoc(Reg, Operands), 3720 "source operand must be either a VGPR or an inline constant"); 3721 return false; 3722 } 3723 3724 return true; 3725 } 3726 3727 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3728 const OperandVector &Operands) { 3729 const unsigned Opc = Inst.getOpcode(); 3730 const MCInstrDesc &Desc = MII.get(Opc); 3731 3732 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3733 return true; 3734 3735 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3736 if (Src2Idx == -1) 3737 return true; 3738 3739 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3740 if (!Src2.isReg()) 3741 return true; 3742 3743 MCRegister Src2Reg = Src2.getReg(); 3744 MCRegister DstReg = Inst.getOperand(0).getReg(); 3745 if (Src2Reg == DstReg) 3746 return true; 3747 3748 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3749 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3750 return true; 3751 3752 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3753 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3754 "source 2 operand must not partially overlap with dst"); 3755 return false; 3756 } 3757 3758 return true; 3759 } 3760 3761 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3762 switch (Inst.getOpcode()) { 3763 default: 3764 return true; 3765 case V_DIV_SCALE_F32_gfx6_gfx7: 3766 case V_DIV_SCALE_F32_vi: 3767 case V_DIV_SCALE_F32_gfx10: 3768 case V_DIV_SCALE_F64_gfx6_gfx7: 3769 case V_DIV_SCALE_F64_vi: 3770 case V_DIV_SCALE_F64_gfx10: 3771 break; 3772 } 3773 3774 // TODO: Check that src0 = src1 or src2. 3775 3776 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3777 AMDGPU::OpName::src2_modifiers, 3778 AMDGPU::OpName::src2_modifiers}) { 3779 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3780 .getImm() & 3781 SISrcMods::ABS) { 3782 return false; 3783 } 3784 } 3785 3786 return true; 3787 } 3788 3789 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3790 3791 const unsigned Opc = Inst.getOpcode(); 3792 const MCInstrDesc &Desc = MII.get(Opc); 3793 3794 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3795 return true; 3796 3797 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3798 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3799 if (isCI() || isSI()) 3800 return false; 3801 } 3802 3803 return true; 3804 } 3805 3806 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3807 const unsigned Opc = Inst.getOpcode(); 3808 const MCInstrDesc &Desc = MII.get(Opc); 3809 3810 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3811 return true; 3812 3813 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3814 if (DimIdx < 0) 3815 return true; 3816 3817 long Imm = Inst.getOperand(DimIdx).getImm(); 3818 if (Imm < 0 || Imm >= 8) 3819 return false; 3820 3821 return true; 3822 } 3823 3824 static bool IsRevOpcode(const unsigned Opcode) 3825 { 3826 switch (Opcode) { 3827 case AMDGPU::V_SUBREV_F32_e32: 3828 case AMDGPU::V_SUBREV_F32_e64: 3829 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3830 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3831 case AMDGPU::V_SUBREV_F32_e32_vi: 3832 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3833 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3834 case AMDGPU::V_SUBREV_F32_e64_vi: 3835 3836 case AMDGPU::V_SUBREV_CO_U32_e32: 3837 case AMDGPU::V_SUBREV_CO_U32_e64: 3838 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3839 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3840 3841 case AMDGPU::V_SUBBREV_U32_e32: 3842 case AMDGPU::V_SUBBREV_U32_e64: 3843 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3844 case AMDGPU::V_SUBBREV_U32_e32_vi: 3845 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3846 case AMDGPU::V_SUBBREV_U32_e64_vi: 3847 3848 case AMDGPU::V_SUBREV_U32_e32: 3849 case AMDGPU::V_SUBREV_U32_e64: 3850 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3851 case AMDGPU::V_SUBREV_U32_e32_vi: 3852 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3853 case AMDGPU::V_SUBREV_U32_e64_vi: 3854 3855 case AMDGPU::V_SUBREV_F16_e32: 3856 case AMDGPU::V_SUBREV_F16_e64: 3857 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3858 case AMDGPU::V_SUBREV_F16_e32_vi: 3859 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3860 case AMDGPU::V_SUBREV_F16_e64_vi: 3861 3862 case AMDGPU::V_SUBREV_U16_e32: 3863 case AMDGPU::V_SUBREV_U16_e64: 3864 case AMDGPU::V_SUBREV_U16_e32_vi: 3865 case AMDGPU::V_SUBREV_U16_e64_vi: 3866 3867 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3868 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3869 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3870 3871 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3872 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3873 3874 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3875 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3876 3877 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3878 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3879 3880 case AMDGPU::V_LSHRREV_B32_e32: 3881 case AMDGPU::V_LSHRREV_B32_e64: 3882 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3883 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3884 case AMDGPU::V_LSHRREV_B32_e32_vi: 3885 case AMDGPU::V_LSHRREV_B32_e64_vi: 3886 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3887 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3888 3889 case AMDGPU::V_ASHRREV_I32_e32: 3890 case AMDGPU::V_ASHRREV_I32_e64: 3891 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3892 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3893 case AMDGPU::V_ASHRREV_I32_e32_vi: 3894 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3895 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3896 case AMDGPU::V_ASHRREV_I32_e64_vi: 3897 3898 case AMDGPU::V_LSHLREV_B32_e32: 3899 case AMDGPU::V_LSHLREV_B32_e64: 3900 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3901 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3902 case AMDGPU::V_LSHLREV_B32_e32_vi: 3903 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3904 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3905 case AMDGPU::V_LSHLREV_B32_e64_vi: 3906 3907 case AMDGPU::V_LSHLREV_B16_e32: 3908 case AMDGPU::V_LSHLREV_B16_e64: 3909 case AMDGPU::V_LSHLREV_B16_e32_vi: 3910 case AMDGPU::V_LSHLREV_B16_e64_vi: 3911 case AMDGPU::V_LSHLREV_B16_gfx10: 3912 3913 case AMDGPU::V_LSHRREV_B16_e32: 3914 case AMDGPU::V_LSHRREV_B16_e64: 3915 case AMDGPU::V_LSHRREV_B16_e32_vi: 3916 case AMDGPU::V_LSHRREV_B16_e64_vi: 3917 case AMDGPU::V_LSHRREV_B16_gfx10: 3918 3919 case AMDGPU::V_ASHRREV_I16_e32: 3920 case AMDGPU::V_ASHRREV_I16_e64: 3921 case AMDGPU::V_ASHRREV_I16_e32_vi: 3922 case AMDGPU::V_ASHRREV_I16_e64_vi: 3923 case AMDGPU::V_ASHRREV_I16_gfx10: 3924 3925 case AMDGPU::V_LSHLREV_B64_e64: 3926 case AMDGPU::V_LSHLREV_B64_gfx10: 3927 case AMDGPU::V_LSHLREV_B64_vi: 3928 3929 case AMDGPU::V_LSHRREV_B64_e64: 3930 case AMDGPU::V_LSHRREV_B64_gfx10: 3931 case AMDGPU::V_LSHRREV_B64_vi: 3932 3933 case AMDGPU::V_ASHRREV_I64_e64: 3934 case AMDGPU::V_ASHRREV_I64_gfx10: 3935 case AMDGPU::V_ASHRREV_I64_vi: 3936 3937 case AMDGPU::V_PK_LSHLREV_B16: 3938 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3939 case AMDGPU::V_PK_LSHLREV_B16_vi: 3940 3941 case AMDGPU::V_PK_LSHRREV_B16: 3942 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3943 case AMDGPU::V_PK_LSHRREV_B16_vi: 3944 case AMDGPU::V_PK_ASHRREV_I16: 3945 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3946 case AMDGPU::V_PK_ASHRREV_I16_vi: 3947 return true; 3948 default: 3949 return false; 3950 } 3951 } 3952 3953 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3954 3955 using namespace SIInstrFlags; 3956 const unsigned Opcode = Inst.getOpcode(); 3957 const MCInstrDesc &Desc = MII.get(Opcode); 3958 3959 // lds_direct register is defined so that it can be used 3960 // with 9-bit operands only. Ignore encodings which do not accept these. 3961 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3962 if ((Desc.TSFlags & Enc) == 0) 3963 return None; 3964 3965 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3966 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3967 if (SrcIdx == -1) 3968 break; 3969 const auto &Src = Inst.getOperand(SrcIdx); 3970 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3971 3972 if (isGFX90A()) 3973 return StringRef("lds_direct is not supported on this GPU"); 3974 3975 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3976 return StringRef("lds_direct cannot be used with this instruction"); 3977 3978 if (SrcName != OpName::src0) 3979 return StringRef("lds_direct may be used as src0 only"); 3980 } 3981 } 3982 3983 return None; 3984 } 3985 3986 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3987 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3988 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3989 if (Op.isFlatOffset()) 3990 return Op.getStartLoc(); 3991 } 3992 return getLoc(); 3993 } 3994 3995 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3996 const OperandVector &Operands) { 3997 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3998 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3999 return true; 4000 4001 auto Opcode = Inst.getOpcode(); 4002 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4003 assert(OpNum != -1); 4004 4005 const auto &Op = Inst.getOperand(OpNum); 4006 if (!hasFlatOffsets() && Op.getImm() != 0) { 4007 Error(getFlatOffsetLoc(Operands), 4008 "flat offset modifier is not supported on this GPU"); 4009 return false; 4010 } 4011 4012 // For FLAT segment the offset must be positive; 4013 // MSB is ignored and forced to zero. 4014 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4015 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4016 if (!isIntN(OffsetSize, Op.getImm())) { 4017 Error(getFlatOffsetLoc(Operands), 4018 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4019 return false; 4020 } 4021 } else { 4022 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4023 if (!isUIntN(OffsetSize, Op.getImm())) { 4024 Error(getFlatOffsetLoc(Operands), 4025 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4026 return false; 4027 } 4028 } 4029 4030 return true; 4031 } 4032 4033 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4034 // Start with second operand because SMEM Offset cannot be dst or src0. 4035 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4036 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4037 if (Op.isSMEMOffset()) 4038 return Op.getStartLoc(); 4039 } 4040 return getLoc(); 4041 } 4042 4043 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4044 const OperandVector &Operands) { 4045 if (isCI() || isSI()) 4046 return true; 4047 4048 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4049 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4050 return true; 4051 4052 auto Opcode = Inst.getOpcode(); 4053 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4054 if (OpNum == -1) 4055 return true; 4056 4057 const auto &Op = Inst.getOperand(OpNum); 4058 if (!Op.isImm()) 4059 return true; 4060 4061 uint64_t Offset = Op.getImm(); 4062 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4063 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4064 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4065 return true; 4066 4067 Error(getSMEMOffsetLoc(Operands), 4068 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4069 "expected a 21-bit signed offset"); 4070 4071 return false; 4072 } 4073 4074 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4075 unsigned Opcode = Inst.getOpcode(); 4076 const MCInstrDesc &Desc = MII.get(Opcode); 4077 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4078 return true; 4079 4080 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4081 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4082 4083 const int OpIndices[] = { Src0Idx, Src1Idx }; 4084 4085 unsigned NumExprs = 0; 4086 unsigned NumLiterals = 0; 4087 uint32_t LiteralValue; 4088 4089 for (int OpIdx : OpIndices) { 4090 if (OpIdx == -1) break; 4091 4092 const MCOperand &MO = Inst.getOperand(OpIdx); 4093 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4094 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4095 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4096 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4097 if (NumLiterals == 0 || LiteralValue != Value) { 4098 LiteralValue = Value; 4099 ++NumLiterals; 4100 } 4101 } else if (MO.isExpr()) { 4102 ++NumExprs; 4103 } 4104 } 4105 } 4106 4107 return NumLiterals + NumExprs <= 1; 4108 } 4109 4110 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4111 const unsigned Opc = Inst.getOpcode(); 4112 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4113 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4114 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4115 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4116 4117 if (OpSel & ~3) 4118 return false; 4119 } 4120 4121 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4122 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4123 if (OpSelIdx != -1) { 4124 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4125 return false; 4126 } 4127 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4128 if (OpSelHiIdx != -1) { 4129 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4130 return false; 4131 } 4132 } 4133 4134 return true; 4135 } 4136 4137 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4138 const OperandVector &Operands) { 4139 const unsigned Opc = Inst.getOpcode(); 4140 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4141 if (DppCtrlIdx < 0) 4142 return true; 4143 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4144 4145 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4146 // DPP64 is supported for row_newbcast only. 4147 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4148 if (Src0Idx >= 0 && 4149 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4150 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4151 Error(S, "64 bit dpp only supports row_newbcast"); 4152 return false; 4153 } 4154 } 4155 4156 return true; 4157 } 4158 4159 // Check if VCC register matches wavefront size 4160 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4161 auto FB = getFeatureBits(); 4162 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4163 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4164 } 4165 4166 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4167 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4168 const OperandVector &Operands) { 4169 unsigned Opcode = Inst.getOpcode(); 4170 const MCInstrDesc &Desc = MII.get(Opcode); 4171 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4172 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4173 ImmIdx == -1) 4174 return true; 4175 4176 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4177 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4178 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4179 4180 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4181 4182 unsigned NumExprs = 0; 4183 unsigned NumLiterals = 0; 4184 uint32_t LiteralValue; 4185 4186 for (int OpIdx : OpIndices) { 4187 if (OpIdx == -1) 4188 continue; 4189 4190 const MCOperand &MO = Inst.getOperand(OpIdx); 4191 if (!MO.isImm() && !MO.isExpr()) 4192 continue; 4193 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4194 continue; 4195 4196 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4197 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4198 Error(getConstLoc(Operands), 4199 "inline constants are not allowed for this operand"); 4200 return false; 4201 } 4202 4203 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4204 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4205 if (NumLiterals == 0 || LiteralValue != Value) { 4206 LiteralValue = Value; 4207 ++NumLiterals; 4208 } 4209 } else if (MO.isExpr()) { 4210 ++NumExprs; 4211 } 4212 } 4213 NumLiterals += NumExprs; 4214 4215 if (!NumLiterals) 4216 return true; 4217 4218 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4219 Error(getLitLoc(Operands), "literal operands are not supported"); 4220 return false; 4221 } 4222 4223 if (NumLiterals > 1) { 4224 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4225 return false; 4226 } 4227 4228 return true; 4229 } 4230 4231 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4232 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4233 const MCRegisterInfo *MRI) { 4234 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4235 if (OpIdx < 0) 4236 return -1; 4237 4238 const MCOperand &Op = Inst.getOperand(OpIdx); 4239 if (!Op.isReg()) 4240 return -1; 4241 4242 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4243 auto Reg = Sub ? Sub : Op.getReg(); 4244 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4245 return AGPR32.contains(Reg) ? 1 : 0; 4246 } 4247 4248 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4249 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4250 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4251 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4252 SIInstrFlags::DS)) == 0) 4253 return true; 4254 4255 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4256 : AMDGPU::OpName::vdata; 4257 4258 const MCRegisterInfo *MRI = getMRI(); 4259 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4260 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4261 4262 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4263 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4264 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4265 return false; 4266 } 4267 4268 auto FB = getFeatureBits(); 4269 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4270 if (DataAreg < 0 || DstAreg < 0) 4271 return true; 4272 return DstAreg == DataAreg; 4273 } 4274 4275 return DstAreg < 1 && DataAreg < 1; 4276 } 4277 4278 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4279 auto FB = getFeatureBits(); 4280 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4281 return true; 4282 4283 const MCRegisterInfo *MRI = getMRI(); 4284 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4285 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4286 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4287 const MCOperand &Op = Inst.getOperand(I); 4288 if (!Op.isReg()) 4289 continue; 4290 4291 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4292 if (!Sub) 4293 continue; 4294 4295 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4296 return false; 4297 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4298 return false; 4299 } 4300 4301 return true; 4302 } 4303 4304 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4305 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4306 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4307 if (Op.isBLGP()) 4308 return Op.getStartLoc(); 4309 } 4310 return SMLoc(); 4311 } 4312 4313 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4314 const OperandVector &Operands) { 4315 unsigned Opc = Inst.getOpcode(); 4316 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4317 if (BlgpIdx == -1) 4318 return true; 4319 SMLoc BLGPLoc = getBLGPLoc(Operands); 4320 if (!BLGPLoc.isValid()) 4321 return true; 4322 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4323 auto FB = getFeatureBits(); 4324 bool UsesNeg = false; 4325 if (FB[AMDGPU::FeatureGFX940Insts]) { 4326 switch (Opc) { 4327 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4328 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4329 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4330 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4331 UsesNeg = true; 4332 } 4333 } 4334 4335 if (IsNeg == UsesNeg) 4336 return true; 4337 4338 Error(BLGPLoc, 4339 UsesNeg ? "invalid modifier: blgp is not supported" 4340 : "invalid modifier: neg is not supported"); 4341 4342 return false; 4343 } 4344 4345 // gfx90a has an undocumented limitation: 4346 // DS_GWS opcodes must use even aligned registers. 4347 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4348 const OperandVector &Operands) { 4349 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4350 return true; 4351 4352 int Opc = Inst.getOpcode(); 4353 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4354 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4355 return true; 4356 4357 const MCRegisterInfo *MRI = getMRI(); 4358 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4359 int Data0Pos = 4360 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4361 assert(Data0Pos != -1); 4362 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4363 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4364 if (RegIdx & 1) { 4365 SMLoc RegLoc = getRegLoc(Reg, Operands); 4366 Error(RegLoc, "vgpr must be even aligned"); 4367 return false; 4368 } 4369 4370 return true; 4371 } 4372 4373 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4374 const OperandVector &Operands, 4375 const SMLoc &IDLoc) { 4376 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4377 AMDGPU::OpName::cpol); 4378 if (CPolPos == -1) 4379 return true; 4380 4381 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4382 4383 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4384 if ((TSFlags & (SIInstrFlags::SMRD)) && 4385 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4386 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4387 return false; 4388 } 4389 4390 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4391 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4392 StringRef CStr(S.getPointer()); 4393 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4394 Error(S, "scc is not supported on this GPU"); 4395 return false; 4396 } 4397 4398 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4399 return true; 4400 4401 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4402 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4403 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4404 : "instruction must use glc"); 4405 return false; 4406 } 4407 } else { 4408 if (CPol & CPol::GLC) { 4409 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4410 StringRef CStr(S.getPointer()); 4411 S = SMLoc::getFromPointer( 4412 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4413 Error(S, isGFX940() ? "instruction must not use sc0" 4414 : "instruction must not use glc"); 4415 return false; 4416 } 4417 } 4418 4419 return true; 4420 } 4421 4422 bool AMDGPUAsmParser::validateFlatLdsDMA(const MCInst &Inst, 4423 const OperandVector &Operands, 4424 const SMLoc &IDLoc) { 4425 if (isGFX940()) 4426 return true; 4427 4428 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4429 if ((TSFlags & (SIInstrFlags::VALU | SIInstrFlags::FLAT)) != 4430 (SIInstrFlags::VALU | SIInstrFlags::FLAT)) 4431 return true; 4432 // This is FLAT LDS DMA. 4433 4434 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyLDS, Operands); 4435 StringRef CStr(S.getPointer()); 4436 if (!CStr.startswith("lds")) { 4437 // This is incorrectly selected LDS DMA version of a FLAT load opcode. 4438 // And LDS version should have 'lds' modifier, but it follows optional 4439 // operands so its absense is ignored by the matcher. 4440 Error(IDLoc, "invalid operands for instruction"); 4441 return false; 4442 } 4443 4444 return true; 4445 } 4446 4447 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4448 const SMLoc &IDLoc, 4449 const OperandVector &Operands) { 4450 if (auto ErrMsg = validateLdsDirect(Inst)) { 4451 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4452 return false; 4453 } 4454 if (!validateSOPLiteral(Inst)) { 4455 Error(getLitLoc(Operands), 4456 "only one literal operand is allowed"); 4457 return false; 4458 } 4459 if (!validateVOPLiteral(Inst, Operands)) { 4460 return false; 4461 } 4462 if (!validateConstantBusLimitations(Inst, Operands)) { 4463 return false; 4464 } 4465 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4466 return false; 4467 } 4468 if (!validateIntClampSupported(Inst)) { 4469 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4470 "integer clamping is not supported on this GPU"); 4471 return false; 4472 } 4473 if (!validateOpSel(Inst)) { 4474 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4475 "invalid op_sel operand"); 4476 return false; 4477 } 4478 if (!validateDPP(Inst, Operands)) { 4479 return false; 4480 } 4481 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4482 if (!validateMIMGD16(Inst)) { 4483 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4484 "d16 modifier is not supported on this GPU"); 4485 return false; 4486 } 4487 if (!validateMIMGDim(Inst)) { 4488 Error(IDLoc, "dim modifier is required on this GPU"); 4489 return false; 4490 } 4491 if (!validateMIMGMSAA(Inst)) { 4492 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4493 "invalid dim; must be MSAA type"); 4494 return false; 4495 } 4496 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4497 Error(IDLoc, *ErrMsg); 4498 return false; 4499 } 4500 if (!validateMIMGAddrSize(Inst)) { 4501 Error(IDLoc, 4502 "image address size does not match dim and a16"); 4503 return false; 4504 } 4505 if (!validateMIMGAtomicDMask(Inst)) { 4506 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4507 "invalid atomic image dmask"); 4508 return false; 4509 } 4510 if (!validateMIMGGatherDMask(Inst)) { 4511 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4512 "invalid image_gather dmask: only one bit must be set"); 4513 return false; 4514 } 4515 if (!validateMovrels(Inst, Operands)) { 4516 return false; 4517 } 4518 if (!validateFlatOffset(Inst, Operands)) { 4519 return false; 4520 } 4521 if (!validateSMEMOffset(Inst, Operands)) { 4522 return false; 4523 } 4524 if (!validateMAIAccWrite(Inst, Operands)) { 4525 return false; 4526 } 4527 if (!validateMFMA(Inst, Operands)) { 4528 return false; 4529 } 4530 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4531 return false; 4532 } 4533 4534 if (!validateAGPRLdSt(Inst)) { 4535 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4536 ? "invalid register class: data and dst should be all VGPR or AGPR" 4537 : "invalid register class: agpr loads and stores not supported on this GPU" 4538 ); 4539 return false; 4540 } 4541 if (!validateVGPRAlign(Inst)) { 4542 Error(IDLoc, 4543 "invalid register class: vgpr tuples must be 64 bit aligned"); 4544 return false; 4545 } 4546 if (!validateGWS(Inst, Operands)) { 4547 return false; 4548 } 4549 4550 if (!validateBLGP(Inst, Operands)) { 4551 return false; 4552 } 4553 4554 if (!validateDivScale(Inst)) { 4555 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4556 return false; 4557 } 4558 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4559 return false; 4560 } 4561 4562 if (!validateFlatLdsDMA(Inst, Operands, IDLoc)) { 4563 return false; 4564 } 4565 4566 return true; 4567 } 4568 4569 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4570 const FeatureBitset &FBS, 4571 unsigned VariantID = 0); 4572 4573 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4574 const FeatureBitset &AvailableFeatures, 4575 unsigned VariantID); 4576 4577 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4578 const FeatureBitset &FBS) { 4579 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4580 } 4581 4582 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4583 const FeatureBitset &FBS, 4584 ArrayRef<unsigned> Variants) { 4585 for (auto Variant : Variants) { 4586 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4587 return true; 4588 } 4589 4590 return false; 4591 } 4592 4593 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4594 const SMLoc &IDLoc) { 4595 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4596 4597 // Check if requested instruction variant is supported. 4598 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4599 return false; 4600 4601 // This instruction is not supported. 4602 // Clear any other pending errors because they are no longer relevant. 4603 getParser().clearPendingErrors(); 4604 4605 // Requested instruction variant is not supported. 4606 // Check if any other variants are supported. 4607 StringRef VariantName = getMatchedVariantName(); 4608 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4609 return Error(IDLoc, 4610 Twine(VariantName, 4611 " variant of this instruction is not supported")); 4612 } 4613 4614 // Finally check if this instruction is supported on any other GPU. 4615 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4616 return Error(IDLoc, "instruction not supported on this GPU"); 4617 } 4618 4619 // Instruction not supported on any GPU. Probably a typo. 4620 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4621 return Error(IDLoc, "invalid instruction" + Suggestion); 4622 } 4623 4624 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4625 OperandVector &Operands, 4626 MCStreamer &Out, 4627 uint64_t &ErrorInfo, 4628 bool MatchingInlineAsm) { 4629 MCInst Inst; 4630 unsigned Result = Match_Success; 4631 for (auto Variant : getMatchedVariants()) { 4632 uint64_t EI; 4633 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4634 Variant); 4635 // We order match statuses from least to most specific. We use most specific 4636 // status as resulting 4637 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4638 if ((R == Match_Success) || 4639 (R == Match_PreferE32) || 4640 (R == Match_MissingFeature && Result != Match_PreferE32) || 4641 (R == Match_InvalidOperand && Result != Match_MissingFeature 4642 && Result != Match_PreferE32) || 4643 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4644 && Result != Match_MissingFeature 4645 && Result != Match_PreferE32)) { 4646 Result = R; 4647 ErrorInfo = EI; 4648 } 4649 if (R == Match_Success) 4650 break; 4651 } 4652 4653 if (Result == Match_Success) { 4654 if (!validateInstruction(Inst, IDLoc, Operands)) { 4655 return true; 4656 } 4657 Inst.setLoc(IDLoc); 4658 Out.emitInstruction(Inst, getSTI()); 4659 return false; 4660 } 4661 4662 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4663 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4664 return true; 4665 } 4666 4667 switch (Result) { 4668 default: break; 4669 case Match_MissingFeature: 4670 // It has been verified that the specified instruction 4671 // mnemonic is valid. A match was found but it requires 4672 // features which are not supported on this GPU. 4673 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4674 4675 case Match_InvalidOperand: { 4676 SMLoc ErrorLoc = IDLoc; 4677 if (ErrorInfo != ~0ULL) { 4678 if (ErrorInfo >= Operands.size()) { 4679 return Error(IDLoc, "too few operands for instruction"); 4680 } 4681 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4682 if (ErrorLoc == SMLoc()) 4683 ErrorLoc = IDLoc; 4684 } 4685 return Error(ErrorLoc, "invalid operand for instruction"); 4686 } 4687 4688 case Match_PreferE32: 4689 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4690 "should be encoded as e32"); 4691 case Match_MnemonicFail: 4692 llvm_unreachable("Invalid instructions should have been handled already"); 4693 } 4694 llvm_unreachable("Implement any new match types added!"); 4695 } 4696 4697 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4698 int64_t Tmp = -1; 4699 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4700 return true; 4701 } 4702 if (getParser().parseAbsoluteExpression(Tmp)) { 4703 return true; 4704 } 4705 Ret = static_cast<uint32_t>(Tmp); 4706 return false; 4707 } 4708 4709 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4710 uint32_t &Minor) { 4711 if (ParseAsAbsoluteExpression(Major)) 4712 return TokError("invalid major version"); 4713 4714 if (!trySkipToken(AsmToken::Comma)) 4715 return TokError("minor version number required, comma expected"); 4716 4717 if (ParseAsAbsoluteExpression(Minor)) 4718 return TokError("invalid minor version"); 4719 4720 return false; 4721 } 4722 4723 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4724 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4725 return TokError("directive only supported for amdgcn architecture"); 4726 4727 std::string TargetIDDirective; 4728 SMLoc TargetStart = getTok().getLoc(); 4729 if (getParser().parseEscapedString(TargetIDDirective)) 4730 return true; 4731 4732 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4733 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4734 return getParser().Error(TargetRange.Start, 4735 (Twine(".amdgcn_target directive's target id ") + 4736 Twine(TargetIDDirective) + 4737 Twine(" does not match the specified target id ") + 4738 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4739 4740 return false; 4741 } 4742 4743 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4744 return Error(Range.Start, "value out of range", Range); 4745 } 4746 4747 bool AMDGPUAsmParser::calculateGPRBlocks( 4748 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4749 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4750 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4751 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4752 // TODO(scott.linder): These calculations are duplicated from 4753 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4754 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4755 4756 unsigned NumVGPRs = NextFreeVGPR; 4757 unsigned NumSGPRs = NextFreeSGPR; 4758 4759 if (Version.Major >= 10) 4760 NumSGPRs = 0; 4761 else { 4762 unsigned MaxAddressableNumSGPRs = 4763 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4764 4765 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4766 NumSGPRs > MaxAddressableNumSGPRs) 4767 return OutOfRangeError(SGPRRange); 4768 4769 NumSGPRs += 4770 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4771 4772 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4773 NumSGPRs > MaxAddressableNumSGPRs) 4774 return OutOfRangeError(SGPRRange); 4775 4776 if (Features.test(FeatureSGPRInitBug)) 4777 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4778 } 4779 4780 VGPRBlocks = 4781 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4782 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4783 4784 return false; 4785 } 4786 4787 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4788 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4789 return TokError("directive only supported for amdgcn architecture"); 4790 4791 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4792 return TokError("directive only supported for amdhsa OS"); 4793 4794 StringRef KernelName; 4795 if (getParser().parseIdentifier(KernelName)) 4796 return true; 4797 4798 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4799 4800 StringSet<> Seen; 4801 4802 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4803 4804 SMRange VGPRRange; 4805 uint64_t NextFreeVGPR = 0; 4806 uint64_t AccumOffset = 0; 4807 uint64_t SharedVGPRCount = 0; 4808 SMRange SGPRRange; 4809 uint64_t NextFreeSGPR = 0; 4810 4811 // Count the number of user SGPRs implied from the enabled feature bits. 4812 unsigned ImpliedUserSGPRCount = 0; 4813 4814 // Track if the asm explicitly contains the directive for the user SGPR 4815 // count. 4816 Optional<unsigned> ExplicitUserSGPRCount; 4817 bool ReserveVCC = true; 4818 bool ReserveFlatScr = true; 4819 Optional<bool> EnableWavefrontSize32; 4820 4821 while (true) { 4822 while (trySkipToken(AsmToken::EndOfStatement)); 4823 4824 StringRef ID; 4825 SMRange IDRange = getTok().getLocRange(); 4826 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4827 return true; 4828 4829 if (ID == ".end_amdhsa_kernel") 4830 break; 4831 4832 if (Seen.find(ID) != Seen.end()) 4833 return TokError(".amdhsa_ directives cannot be repeated"); 4834 Seen.insert(ID); 4835 4836 SMLoc ValStart = getLoc(); 4837 int64_t IVal; 4838 if (getParser().parseAbsoluteExpression(IVal)) 4839 return true; 4840 SMLoc ValEnd = getLoc(); 4841 SMRange ValRange = SMRange(ValStart, ValEnd); 4842 4843 if (IVal < 0) 4844 return OutOfRangeError(ValRange); 4845 4846 uint64_t Val = IVal; 4847 4848 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4849 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4850 return OutOfRangeError(RANGE); \ 4851 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4852 4853 if (ID == ".amdhsa_group_segment_fixed_size") { 4854 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4855 return OutOfRangeError(ValRange); 4856 KD.group_segment_fixed_size = Val; 4857 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4858 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4859 return OutOfRangeError(ValRange); 4860 KD.private_segment_fixed_size = Val; 4861 } else if (ID == ".amdhsa_kernarg_size") { 4862 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4863 return OutOfRangeError(ValRange); 4864 KD.kernarg_size = Val; 4865 } else if (ID == ".amdhsa_user_sgpr_count") { 4866 ExplicitUserSGPRCount = Val; 4867 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4868 if (hasArchitectedFlatScratch()) 4869 return Error(IDRange.Start, 4870 "directive is not supported with architected flat scratch", 4871 IDRange); 4872 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4873 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4874 Val, ValRange); 4875 if (Val) 4876 ImpliedUserSGPRCount += 4; 4877 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4878 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4879 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4880 ValRange); 4881 if (Val) 4882 ImpliedUserSGPRCount += 2; 4883 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4884 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4885 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4886 ValRange); 4887 if (Val) 4888 ImpliedUserSGPRCount += 2; 4889 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4890 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4891 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4892 Val, ValRange); 4893 if (Val) 4894 ImpliedUserSGPRCount += 2; 4895 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4896 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4897 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4898 ValRange); 4899 if (Val) 4900 ImpliedUserSGPRCount += 2; 4901 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4902 if (hasArchitectedFlatScratch()) 4903 return Error(IDRange.Start, 4904 "directive is not supported with architected flat scratch", 4905 IDRange); 4906 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4907 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4908 ValRange); 4909 if (Val) 4910 ImpliedUserSGPRCount += 2; 4911 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4912 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4913 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4914 Val, ValRange); 4915 if (Val) 4916 ImpliedUserSGPRCount += 1; 4917 } else if (ID == ".amdhsa_wavefront_size32") { 4918 if (IVersion.Major < 10) 4919 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4920 EnableWavefrontSize32 = Val; 4921 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4922 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4923 Val, ValRange); 4924 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4925 if (hasArchitectedFlatScratch()) 4926 return Error(IDRange.Start, 4927 "directive is not supported with architected flat scratch", 4928 IDRange); 4929 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4930 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4931 } else if (ID == ".amdhsa_enable_private_segment") { 4932 if (!hasArchitectedFlatScratch()) 4933 return Error( 4934 IDRange.Start, 4935 "directive is not supported without architected flat scratch", 4936 IDRange); 4937 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4938 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4939 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4940 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4941 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4942 ValRange); 4943 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4944 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4945 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4946 ValRange); 4947 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4948 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4949 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4950 ValRange); 4951 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4952 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4953 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4954 ValRange); 4955 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4956 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4957 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4958 ValRange); 4959 } else if (ID == ".amdhsa_next_free_vgpr") { 4960 VGPRRange = ValRange; 4961 NextFreeVGPR = Val; 4962 } else if (ID == ".amdhsa_next_free_sgpr") { 4963 SGPRRange = ValRange; 4964 NextFreeSGPR = Val; 4965 } else if (ID == ".amdhsa_accum_offset") { 4966 if (!isGFX90A()) 4967 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4968 AccumOffset = Val; 4969 } else if (ID == ".amdhsa_reserve_vcc") { 4970 if (!isUInt<1>(Val)) 4971 return OutOfRangeError(ValRange); 4972 ReserveVCC = Val; 4973 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4974 if (IVersion.Major < 7) 4975 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4976 if (hasArchitectedFlatScratch()) 4977 return Error(IDRange.Start, 4978 "directive is not supported with architected flat scratch", 4979 IDRange); 4980 if (!isUInt<1>(Val)) 4981 return OutOfRangeError(ValRange); 4982 ReserveFlatScr = Val; 4983 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4984 if (IVersion.Major < 8) 4985 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4986 if (!isUInt<1>(Val)) 4987 return OutOfRangeError(ValRange); 4988 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4989 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4990 IDRange); 4991 } else if (ID == ".amdhsa_float_round_mode_32") { 4992 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4993 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4994 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4995 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4996 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4997 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4998 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4999 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 5000 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 5001 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5002 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 5003 ValRange); 5004 } else if (ID == ".amdhsa_dx10_clamp") { 5005 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 5006 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 5007 } else if (ID == ".amdhsa_ieee_mode") { 5008 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 5009 Val, ValRange); 5010 } else if (ID == ".amdhsa_fp16_overflow") { 5011 if (IVersion.Major < 9) 5012 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 5013 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 5014 ValRange); 5015 } else if (ID == ".amdhsa_tg_split") { 5016 if (!isGFX90A()) 5017 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 5018 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 5019 ValRange); 5020 } else if (ID == ".amdhsa_workgroup_processor_mode") { 5021 if (IVersion.Major < 10) 5022 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5023 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 5024 ValRange); 5025 } else if (ID == ".amdhsa_memory_ordered") { 5026 if (IVersion.Major < 10) 5027 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5028 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 5029 ValRange); 5030 } else if (ID == ".amdhsa_forward_progress") { 5031 if (IVersion.Major < 10) 5032 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5033 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 5034 ValRange); 5035 } else if (ID == ".amdhsa_shared_vgpr_count") { 5036 if (IVersion.Major < 10) 5037 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 5038 SharedVGPRCount = Val; 5039 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 5040 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 5041 ValRange); 5042 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5043 PARSE_BITS_ENTRY( 5044 KD.compute_pgm_rsrc2, 5045 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5046 ValRange); 5047 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5048 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5049 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5050 Val, ValRange); 5051 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5052 PARSE_BITS_ENTRY( 5053 KD.compute_pgm_rsrc2, 5054 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5055 ValRange); 5056 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5057 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5058 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5059 Val, ValRange); 5060 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5061 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5062 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5063 Val, ValRange); 5064 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5065 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5066 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5067 Val, ValRange); 5068 } else if (ID == ".amdhsa_exception_int_div_zero") { 5069 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5070 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5071 Val, ValRange); 5072 } else { 5073 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5074 } 5075 5076 #undef PARSE_BITS_ENTRY 5077 } 5078 5079 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5080 return TokError(".amdhsa_next_free_vgpr directive is required"); 5081 5082 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5083 return TokError(".amdhsa_next_free_sgpr directive is required"); 5084 5085 unsigned VGPRBlocks; 5086 unsigned SGPRBlocks; 5087 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5088 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5089 EnableWavefrontSize32, NextFreeVGPR, 5090 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5091 SGPRBlocks)) 5092 return true; 5093 5094 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5095 VGPRBlocks)) 5096 return OutOfRangeError(VGPRRange); 5097 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5098 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5099 5100 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5101 SGPRBlocks)) 5102 return OutOfRangeError(SGPRRange); 5103 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5104 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5105 SGPRBlocks); 5106 5107 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5108 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5109 "enabled user SGPRs"); 5110 5111 unsigned UserSGPRCount = 5112 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5113 5114 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5115 return TokError("too many user SGPRs enabled"); 5116 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5117 UserSGPRCount); 5118 5119 if (isGFX90A()) { 5120 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5121 return TokError(".amdhsa_accum_offset directive is required"); 5122 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5123 return TokError("accum_offset should be in range [4..256] in " 5124 "increments of 4"); 5125 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5126 return TokError("accum_offset exceeds total VGPR allocation"); 5127 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5128 (AccumOffset / 4 - 1)); 5129 } 5130 5131 if (IVersion.Major == 10) { 5132 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5133 if (SharedVGPRCount && EnableWavefrontSize32) { 5134 return TokError("shared_vgpr_count directive not valid on " 5135 "wavefront size 32"); 5136 } 5137 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5138 return TokError("shared_vgpr_count*2 + " 5139 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5140 "exceed 63\n"); 5141 } 5142 } 5143 5144 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5145 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5146 ReserveFlatScr); 5147 return false; 5148 } 5149 5150 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5151 uint32_t Major; 5152 uint32_t Minor; 5153 5154 if (ParseDirectiveMajorMinor(Major, Minor)) 5155 return true; 5156 5157 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5158 return false; 5159 } 5160 5161 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5162 uint32_t Major; 5163 uint32_t Minor; 5164 uint32_t Stepping; 5165 StringRef VendorName; 5166 StringRef ArchName; 5167 5168 // If this directive has no arguments, then use the ISA version for the 5169 // targeted GPU. 5170 if (isToken(AsmToken::EndOfStatement)) { 5171 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5172 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5173 ISA.Stepping, 5174 "AMD", "AMDGPU"); 5175 return false; 5176 } 5177 5178 if (ParseDirectiveMajorMinor(Major, Minor)) 5179 return true; 5180 5181 if (!trySkipToken(AsmToken::Comma)) 5182 return TokError("stepping version number required, comma expected"); 5183 5184 if (ParseAsAbsoluteExpression(Stepping)) 5185 return TokError("invalid stepping version"); 5186 5187 if (!trySkipToken(AsmToken::Comma)) 5188 return TokError("vendor name required, comma expected"); 5189 5190 if (!parseString(VendorName, "invalid vendor name")) 5191 return true; 5192 5193 if (!trySkipToken(AsmToken::Comma)) 5194 return TokError("arch name required, comma expected"); 5195 5196 if (!parseString(ArchName, "invalid arch name")) 5197 return true; 5198 5199 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5200 VendorName, ArchName); 5201 return false; 5202 } 5203 5204 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5205 amd_kernel_code_t &Header) { 5206 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5207 // assembly for backwards compatibility. 5208 if (ID == "max_scratch_backing_memory_byte_size") { 5209 Parser.eatToEndOfStatement(); 5210 return false; 5211 } 5212 5213 SmallString<40> ErrStr; 5214 raw_svector_ostream Err(ErrStr); 5215 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5216 return TokError(Err.str()); 5217 } 5218 Lex(); 5219 5220 if (ID == "enable_wavefront_size32") { 5221 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5222 if (!isGFX10Plus()) 5223 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5224 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5225 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5226 } else { 5227 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5228 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5229 } 5230 } 5231 5232 if (ID == "wavefront_size") { 5233 if (Header.wavefront_size == 5) { 5234 if (!isGFX10Plus()) 5235 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5236 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5237 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5238 } else if (Header.wavefront_size == 6) { 5239 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5240 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5241 } 5242 } 5243 5244 if (ID == "enable_wgp_mode") { 5245 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5246 !isGFX10Plus()) 5247 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5248 } 5249 5250 if (ID == "enable_mem_ordered") { 5251 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5252 !isGFX10Plus()) 5253 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5254 } 5255 5256 if (ID == "enable_fwd_progress") { 5257 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5258 !isGFX10Plus()) 5259 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5260 } 5261 5262 return false; 5263 } 5264 5265 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5266 amd_kernel_code_t Header; 5267 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5268 5269 while (true) { 5270 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5271 // will set the current token to EndOfStatement. 5272 while(trySkipToken(AsmToken::EndOfStatement)); 5273 5274 StringRef ID; 5275 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5276 return true; 5277 5278 if (ID == ".end_amd_kernel_code_t") 5279 break; 5280 5281 if (ParseAMDKernelCodeTValue(ID, Header)) 5282 return true; 5283 } 5284 5285 getTargetStreamer().EmitAMDKernelCodeT(Header); 5286 5287 return false; 5288 } 5289 5290 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5291 StringRef KernelName; 5292 if (!parseId(KernelName, "expected symbol name")) 5293 return true; 5294 5295 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5296 ELF::STT_AMDGPU_HSA_KERNEL); 5297 5298 KernelScope.initialize(getContext()); 5299 return false; 5300 } 5301 5302 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5303 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5304 return Error(getLoc(), 5305 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5306 "architectures"); 5307 } 5308 5309 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5310 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5311 return Error(getParser().getTok().getLoc(), "target id must match options"); 5312 5313 getTargetStreamer().EmitISAVersion(); 5314 Lex(); 5315 5316 return false; 5317 } 5318 5319 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5320 const char *AssemblerDirectiveBegin; 5321 const char *AssemblerDirectiveEnd; 5322 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5323 isHsaAbiVersion3AndAbove(&getSTI()) 5324 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5325 HSAMD::V3::AssemblerDirectiveEnd) 5326 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5327 HSAMD::AssemblerDirectiveEnd); 5328 5329 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5330 return Error(getLoc(), 5331 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5332 "not available on non-amdhsa OSes")).str()); 5333 } 5334 5335 std::string HSAMetadataString; 5336 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5337 HSAMetadataString)) 5338 return true; 5339 5340 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5341 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5342 return Error(getLoc(), "invalid HSA metadata"); 5343 } else { 5344 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5345 return Error(getLoc(), "invalid HSA metadata"); 5346 } 5347 5348 return false; 5349 } 5350 5351 /// Common code to parse out a block of text (typically YAML) between start and 5352 /// end directives. 5353 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5354 const char *AssemblerDirectiveEnd, 5355 std::string &CollectString) { 5356 5357 raw_string_ostream CollectStream(CollectString); 5358 5359 getLexer().setSkipSpace(false); 5360 5361 bool FoundEnd = false; 5362 while (!isToken(AsmToken::Eof)) { 5363 while (isToken(AsmToken::Space)) { 5364 CollectStream << getTokenStr(); 5365 Lex(); 5366 } 5367 5368 if (trySkipId(AssemblerDirectiveEnd)) { 5369 FoundEnd = true; 5370 break; 5371 } 5372 5373 CollectStream << Parser.parseStringToEndOfStatement() 5374 << getContext().getAsmInfo()->getSeparatorString(); 5375 5376 Parser.eatToEndOfStatement(); 5377 } 5378 5379 getLexer().setSkipSpace(true); 5380 5381 if (isToken(AsmToken::Eof) && !FoundEnd) { 5382 return TokError(Twine("expected directive ") + 5383 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5384 } 5385 5386 CollectStream.flush(); 5387 return false; 5388 } 5389 5390 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5391 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5392 std::string String; 5393 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5394 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5395 return true; 5396 5397 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5398 if (!PALMetadata->setFromString(String)) 5399 return Error(getLoc(), "invalid PAL metadata"); 5400 return false; 5401 } 5402 5403 /// Parse the assembler directive for old linear-format PAL metadata. 5404 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5405 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5406 return Error(getLoc(), 5407 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5408 "not available on non-amdpal OSes")).str()); 5409 } 5410 5411 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5412 PALMetadata->setLegacy(); 5413 for (;;) { 5414 uint32_t Key, Value; 5415 if (ParseAsAbsoluteExpression(Key)) { 5416 return TokError(Twine("invalid value in ") + 5417 Twine(PALMD::AssemblerDirective)); 5418 } 5419 if (!trySkipToken(AsmToken::Comma)) { 5420 return TokError(Twine("expected an even number of values in ") + 5421 Twine(PALMD::AssemblerDirective)); 5422 } 5423 if (ParseAsAbsoluteExpression(Value)) { 5424 return TokError(Twine("invalid value in ") + 5425 Twine(PALMD::AssemblerDirective)); 5426 } 5427 PALMetadata->setRegister(Key, Value); 5428 if (!trySkipToken(AsmToken::Comma)) 5429 break; 5430 } 5431 return false; 5432 } 5433 5434 /// ParseDirectiveAMDGPULDS 5435 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5436 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5437 if (getParser().checkForValidSection()) 5438 return true; 5439 5440 StringRef Name; 5441 SMLoc NameLoc = getLoc(); 5442 if (getParser().parseIdentifier(Name)) 5443 return TokError("expected identifier in directive"); 5444 5445 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5446 if (parseToken(AsmToken::Comma, "expected ','")) 5447 return true; 5448 5449 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5450 5451 int64_t Size; 5452 SMLoc SizeLoc = getLoc(); 5453 if (getParser().parseAbsoluteExpression(Size)) 5454 return true; 5455 if (Size < 0) 5456 return Error(SizeLoc, "size must be non-negative"); 5457 if (Size > LocalMemorySize) 5458 return Error(SizeLoc, "size is too large"); 5459 5460 int64_t Alignment = 4; 5461 if (trySkipToken(AsmToken::Comma)) { 5462 SMLoc AlignLoc = getLoc(); 5463 if (getParser().parseAbsoluteExpression(Alignment)) 5464 return true; 5465 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5466 return Error(AlignLoc, "alignment must be a power of two"); 5467 5468 // Alignment larger than the size of LDS is possible in theory, as long 5469 // as the linker manages to place to symbol at address 0, but we do want 5470 // to make sure the alignment fits nicely into a 32-bit integer. 5471 if (Alignment >= 1u << 31) 5472 return Error(AlignLoc, "alignment is too large"); 5473 } 5474 5475 if (parseToken(AsmToken::EndOfStatement, 5476 "unexpected token in '.amdgpu_lds' directive")) 5477 return true; 5478 5479 Symbol->redefineIfPossible(); 5480 if (!Symbol->isUndefined()) 5481 return Error(NameLoc, "invalid symbol redefinition"); 5482 5483 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5484 return false; 5485 } 5486 5487 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5488 StringRef IDVal = DirectiveID.getString(); 5489 5490 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5491 if (IDVal == ".amdhsa_kernel") 5492 return ParseDirectiveAMDHSAKernel(); 5493 5494 // TODO: Restructure/combine with PAL metadata directive. 5495 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5496 return ParseDirectiveHSAMetadata(); 5497 } else { 5498 if (IDVal == ".hsa_code_object_version") 5499 return ParseDirectiveHSACodeObjectVersion(); 5500 5501 if (IDVal == ".hsa_code_object_isa") 5502 return ParseDirectiveHSACodeObjectISA(); 5503 5504 if (IDVal == ".amd_kernel_code_t") 5505 return ParseDirectiveAMDKernelCodeT(); 5506 5507 if (IDVal == ".amdgpu_hsa_kernel") 5508 return ParseDirectiveAMDGPUHsaKernel(); 5509 5510 if (IDVal == ".amd_amdgpu_isa") 5511 return ParseDirectiveISAVersion(); 5512 5513 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5514 return ParseDirectiveHSAMetadata(); 5515 } 5516 5517 if (IDVal == ".amdgcn_target") 5518 return ParseDirectiveAMDGCNTarget(); 5519 5520 if (IDVal == ".amdgpu_lds") 5521 return ParseDirectiveAMDGPULDS(); 5522 5523 if (IDVal == PALMD::AssemblerDirectiveBegin) 5524 return ParseDirectivePALMetadataBegin(); 5525 5526 if (IDVal == PALMD::AssemblerDirective) 5527 return ParseDirectivePALMetadata(); 5528 5529 return true; 5530 } 5531 5532 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5533 unsigned RegNo) { 5534 5535 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5536 return isGFX9Plus(); 5537 5538 // GFX10 has 2 more SGPRs 104 and 105. 5539 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5540 return hasSGPR104_SGPR105(); 5541 5542 switch (RegNo) { 5543 case AMDGPU::SRC_SHARED_BASE: 5544 case AMDGPU::SRC_SHARED_LIMIT: 5545 case AMDGPU::SRC_PRIVATE_BASE: 5546 case AMDGPU::SRC_PRIVATE_LIMIT: 5547 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5548 return isGFX9Plus(); 5549 case AMDGPU::TBA: 5550 case AMDGPU::TBA_LO: 5551 case AMDGPU::TBA_HI: 5552 case AMDGPU::TMA: 5553 case AMDGPU::TMA_LO: 5554 case AMDGPU::TMA_HI: 5555 return !isGFX9Plus(); 5556 case AMDGPU::XNACK_MASK: 5557 case AMDGPU::XNACK_MASK_LO: 5558 case AMDGPU::XNACK_MASK_HI: 5559 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5560 case AMDGPU::SGPR_NULL: 5561 return isGFX10Plus(); 5562 default: 5563 break; 5564 } 5565 5566 if (isCI()) 5567 return true; 5568 5569 if (isSI() || isGFX10Plus()) { 5570 // No flat_scr on SI. 5571 // On GFX10 flat scratch is not a valid register operand and can only be 5572 // accessed with s_setreg/s_getreg. 5573 switch (RegNo) { 5574 case AMDGPU::FLAT_SCR: 5575 case AMDGPU::FLAT_SCR_LO: 5576 case AMDGPU::FLAT_SCR_HI: 5577 return false; 5578 default: 5579 return true; 5580 } 5581 } 5582 5583 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5584 // SI/CI have. 5585 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5586 return hasSGPR102_SGPR103(); 5587 5588 return true; 5589 } 5590 5591 OperandMatchResultTy 5592 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5593 OperandMode Mode) { 5594 // Try to parse with a custom parser 5595 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5596 5597 // If we successfully parsed the operand or if there as an error parsing, 5598 // we are done. 5599 // 5600 // If we are parsing after we reach EndOfStatement then this means we 5601 // are appending default values to the Operands list. This is only done 5602 // by custom parser, so we shouldn't continue on to the generic parsing. 5603 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5604 isToken(AsmToken::EndOfStatement)) 5605 return ResTy; 5606 5607 SMLoc RBraceLoc; 5608 SMLoc LBraceLoc = getLoc(); 5609 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5610 unsigned Prefix = Operands.size(); 5611 5612 for (;;) { 5613 auto Loc = getLoc(); 5614 ResTy = parseReg(Operands); 5615 if (ResTy == MatchOperand_NoMatch) 5616 Error(Loc, "expected a register"); 5617 if (ResTy != MatchOperand_Success) 5618 return MatchOperand_ParseFail; 5619 5620 RBraceLoc = getLoc(); 5621 if (trySkipToken(AsmToken::RBrac)) 5622 break; 5623 5624 if (!skipToken(AsmToken::Comma, 5625 "expected a comma or a closing square bracket")) { 5626 return MatchOperand_ParseFail; 5627 } 5628 } 5629 5630 if (Operands.size() - Prefix > 1) { 5631 Operands.insert(Operands.begin() + Prefix, 5632 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5633 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5634 } 5635 5636 return MatchOperand_Success; 5637 } 5638 5639 return parseRegOrImm(Operands); 5640 } 5641 5642 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5643 // Clear any forced encodings from the previous instruction. 5644 setForcedEncodingSize(0); 5645 setForcedDPP(false); 5646 setForcedSDWA(false); 5647 5648 if (Name.endswith("_e64")) { 5649 setForcedEncodingSize(64); 5650 return Name.substr(0, Name.size() - 4); 5651 } else if (Name.endswith("_e32")) { 5652 setForcedEncodingSize(32); 5653 return Name.substr(0, Name.size() - 4); 5654 } else if (Name.endswith("_dpp")) { 5655 setForcedDPP(true); 5656 return Name.substr(0, Name.size() - 4); 5657 } else if (Name.endswith("_sdwa")) { 5658 setForcedSDWA(true); 5659 return Name.substr(0, Name.size() - 5); 5660 } 5661 return Name; 5662 } 5663 5664 static void applyMnemonicAliases(StringRef &Mnemonic, 5665 const FeatureBitset &Features, 5666 unsigned VariantID); 5667 5668 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5669 StringRef Name, 5670 SMLoc NameLoc, OperandVector &Operands) { 5671 // Add the instruction mnemonic 5672 Name = parseMnemonicSuffix(Name); 5673 5674 // If the target architecture uses MnemonicAlias, call it here to parse 5675 // operands correctly. 5676 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5677 5678 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5679 5680 bool IsMIMG = Name.startswith("image_"); 5681 5682 while (!trySkipToken(AsmToken::EndOfStatement)) { 5683 OperandMode Mode = OperandMode_Default; 5684 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5685 Mode = OperandMode_NSA; 5686 CPolSeen = 0; 5687 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5688 5689 if (Res != MatchOperand_Success) { 5690 checkUnsupportedInstruction(Name, NameLoc); 5691 if (!Parser.hasPendingError()) { 5692 // FIXME: use real operand location rather than the current location. 5693 StringRef Msg = 5694 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5695 "not a valid operand."; 5696 Error(getLoc(), Msg); 5697 } 5698 while (!trySkipToken(AsmToken::EndOfStatement)) { 5699 lex(); 5700 } 5701 return true; 5702 } 5703 5704 // Eat the comma or space if there is one. 5705 trySkipToken(AsmToken::Comma); 5706 } 5707 5708 return false; 5709 } 5710 5711 //===----------------------------------------------------------------------===// 5712 // Utility functions 5713 //===----------------------------------------------------------------------===// 5714 5715 OperandMatchResultTy 5716 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5717 5718 if (!trySkipId(Prefix, AsmToken::Colon)) 5719 return MatchOperand_NoMatch; 5720 5721 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5722 } 5723 5724 OperandMatchResultTy 5725 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5726 AMDGPUOperand::ImmTy ImmTy, 5727 bool (*ConvertResult)(int64_t&)) { 5728 SMLoc S = getLoc(); 5729 int64_t Value = 0; 5730 5731 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5732 if (Res != MatchOperand_Success) 5733 return Res; 5734 5735 if (ConvertResult && !ConvertResult(Value)) { 5736 Error(S, "invalid " + StringRef(Prefix) + " value."); 5737 } 5738 5739 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5740 return MatchOperand_Success; 5741 } 5742 5743 OperandMatchResultTy 5744 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5745 OperandVector &Operands, 5746 AMDGPUOperand::ImmTy ImmTy, 5747 bool (*ConvertResult)(int64_t&)) { 5748 SMLoc S = getLoc(); 5749 if (!trySkipId(Prefix, AsmToken::Colon)) 5750 return MatchOperand_NoMatch; 5751 5752 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5753 return MatchOperand_ParseFail; 5754 5755 unsigned Val = 0; 5756 const unsigned MaxSize = 4; 5757 5758 // FIXME: How to verify the number of elements matches the number of src 5759 // operands? 5760 for (int I = 0; ; ++I) { 5761 int64_t Op; 5762 SMLoc Loc = getLoc(); 5763 if (!parseExpr(Op)) 5764 return MatchOperand_ParseFail; 5765 5766 if (Op != 0 && Op != 1) { 5767 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5768 return MatchOperand_ParseFail; 5769 } 5770 5771 Val |= (Op << I); 5772 5773 if (trySkipToken(AsmToken::RBrac)) 5774 break; 5775 5776 if (I + 1 == MaxSize) { 5777 Error(getLoc(), "expected a closing square bracket"); 5778 return MatchOperand_ParseFail; 5779 } 5780 5781 if (!skipToken(AsmToken::Comma, "expected a comma")) 5782 return MatchOperand_ParseFail; 5783 } 5784 5785 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5786 return MatchOperand_Success; 5787 } 5788 5789 OperandMatchResultTy 5790 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5791 AMDGPUOperand::ImmTy ImmTy) { 5792 int64_t Bit; 5793 SMLoc S = getLoc(); 5794 5795 if (trySkipId(Name)) { 5796 Bit = 1; 5797 } else if (trySkipId("no", Name)) { 5798 Bit = 0; 5799 } else { 5800 return MatchOperand_NoMatch; 5801 } 5802 5803 if (Name == "r128" && !hasMIMG_R128()) { 5804 Error(S, "r128 modifier is not supported on this GPU"); 5805 return MatchOperand_ParseFail; 5806 } 5807 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5808 Error(S, "a16 modifier is not supported on this GPU"); 5809 return MatchOperand_ParseFail; 5810 } 5811 5812 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5813 ImmTy = AMDGPUOperand::ImmTyR128A16; 5814 5815 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5816 return MatchOperand_Success; 5817 } 5818 5819 OperandMatchResultTy 5820 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5821 unsigned CPolOn = 0; 5822 unsigned CPolOff = 0; 5823 SMLoc S = getLoc(); 5824 5825 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5826 if (isGFX940() && !Mnemo.startswith("s_")) { 5827 if (trySkipId("sc0")) 5828 CPolOn = AMDGPU::CPol::SC0; 5829 else if (trySkipId("nosc0")) 5830 CPolOff = AMDGPU::CPol::SC0; 5831 else if (trySkipId("nt")) 5832 CPolOn = AMDGPU::CPol::NT; 5833 else if (trySkipId("nont")) 5834 CPolOff = AMDGPU::CPol::NT; 5835 else if (trySkipId("sc1")) 5836 CPolOn = AMDGPU::CPol::SC1; 5837 else if (trySkipId("nosc1")) 5838 CPolOff = AMDGPU::CPol::SC1; 5839 else 5840 return MatchOperand_NoMatch; 5841 } 5842 else if (trySkipId("glc")) 5843 CPolOn = AMDGPU::CPol::GLC; 5844 else if (trySkipId("noglc")) 5845 CPolOff = AMDGPU::CPol::GLC; 5846 else if (trySkipId("slc")) 5847 CPolOn = AMDGPU::CPol::SLC; 5848 else if (trySkipId("noslc")) 5849 CPolOff = AMDGPU::CPol::SLC; 5850 else if (trySkipId("dlc")) 5851 CPolOn = AMDGPU::CPol::DLC; 5852 else if (trySkipId("nodlc")) 5853 CPolOff = AMDGPU::CPol::DLC; 5854 else if (trySkipId("scc")) 5855 CPolOn = AMDGPU::CPol::SCC; 5856 else if (trySkipId("noscc")) 5857 CPolOff = AMDGPU::CPol::SCC; 5858 else 5859 return MatchOperand_NoMatch; 5860 5861 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5862 Error(S, "dlc modifier is not supported on this GPU"); 5863 return MatchOperand_ParseFail; 5864 } 5865 5866 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5867 Error(S, "scc modifier is not supported on this GPU"); 5868 return MatchOperand_ParseFail; 5869 } 5870 5871 if (CPolSeen & (CPolOn | CPolOff)) { 5872 Error(S, "duplicate cache policy modifier"); 5873 return MatchOperand_ParseFail; 5874 } 5875 5876 CPolSeen |= (CPolOn | CPolOff); 5877 5878 for (unsigned I = 1; I != Operands.size(); ++I) { 5879 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5880 if (Op.isCPol()) { 5881 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5882 return MatchOperand_Success; 5883 } 5884 } 5885 5886 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5887 AMDGPUOperand::ImmTyCPol)); 5888 5889 return MatchOperand_Success; 5890 } 5891 5892 static void addOptionalImmOperand( 5893 MCInst& Inst, const OperandVector& Operands, 5894 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5895 AMDGPUOperand::ImmTy ImmT, 5896 int64_t Default = 0) { 5897 auto i = OptionalIdx.find(ImmT); 5898 if (i != OptionalIdx.end()) { 5899 unsigned Idx = i->second; 5900 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5901 } else { 5902 Inst.addOperand(MCOperand::createImm(Default)); 5903 } 5904 } 5905 5906 OperandMatchResultTy 5907 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5908 StringRef &Value, 5909 SMLoc &StringLoc) { 5910 if (!trySkipId(Prefix, AsmToken::Colon)) 5911 return MatchOperand_NoMatch; 5912 5913 StringLoc = getLoc(); 5914 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5915 : MatchOperand_ParseFail; 5916 } 5917 5918 //===----------------------------------------------------------------------===// 5919 // MTBUF format 5920 //===----------------------------------------------------------------------===// 5921 5922 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5923 int64_t MaxVal, 5924 int64_t &Fmt) { 5925 int64_t Val; 5926 SMLoc Loc = getLoc(); 5927 5928 auto Res = parseIntWithPrefix(Pref, Val); 5929 if (Res == MatchOperand_ParseFail) 5930 return false; 5931 if (Res == MatchOperand_NoMatch) 5932 return true; 5933 5934 if (Val < 0 || Val > MaxVal) { 5935 Error(Loc, Twine("out of range ", StringRef(Pref))); 5936 return false; 5937 } 5938 5939 Fmt = Val; 5940 return true; 5941 } 5942 5943 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5944 // values to live in a joint format operand in the MCInst encoding. 5945 OperandMatchResultTy 5946 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5947 using namespace llvm::AMDGPU::MTBUFFormat; 5948 5949 int64_t Dfmt = DFMT_UNDEF; 5950 int64_t Nfmt = NFMT_UNDEF; 5951 5952 // dfmt and nfmt can appear in either order, and each is optional. 5953 for (int I = 0; I < 2; ++I) { 5954 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5955 return MatchOperand_ParseFail; 5956 5957 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5958 return MatchOperand_ParseFail; 5959 } 5960 // Skip optional comma between dfmt/nfmt 5961 // but guard against 2 commas following each other. 5962 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5963 !peekToken().is(AsmToken::Comma)) { 5964 trySkipToken(AsmToken::Comma); 5965 } 5966 } 5967 5968 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5969 return MatchOperand_NoMatch; 5970 5971 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5972 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5973 5974 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5975 return MatchOperand_Success; 5976 } 5977 5978 OperandMatchResultTy 5979 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5980 using namespace llvm::AMDGPU::MTBUFFormat; 5981 5982 int64_t Fmt = UFMT_UNDEF; 5983 5984 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5985 return MatchOperand_ParseFail; 5986 5987 if (Fmt == UFMT_UNDEF) 5988 return MatchOperand_NoMatch; 5989 5990 Format = Fmt; 5991 return MatchOperand_Success; 5992 } 5993 5994 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5995 int64_t &Nfmt, 5996 StringRef FormatStr, 5997 SMLoc Loc) { 5998 using namespace llvm::AMDGPU::MTBUFFormat; 5999 int64_t Format; 6000 6001 Format = getDfmt(FormatStr); 6002 if (Format != DFMT_UNDEF) { 6003 Dfmt = Format; 6004 return true; 6005 } 6006 6007 Format = getNfmt(FormatStr, getSTI()); 6008 if (Format != NFMT_UNDEF) { 6009 Nfmt = Format; 6010 return true; 6011 } 6012 6013 Error(Loc, "unsupported format"); 6014 return false; 6015 } 6016 6017 OperandMatchResultTy 6018 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 6019 SMLoc FormatLoc, 6020 int64_t &Format) { 6021 using namespace llvm::AMDGPU::MTBUFFormat; 6022 6023 int64_t Dfmt = DFMT_UNDEF; 6024 int64_t Nfmt = NFMT_UNDEF; 6025 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 6026 return MatchOperand_ParseFail; 6027 6028 if (trySkipToken(AsmToken::Comma)) { 6029 StringRef Str; 6030 SMLoc Loc = getLoc(); 6031 if (!parseId(Str, "expected a format string") || 6032 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 6033 return MatchOperand_ParseFail; 6034 } 6035 if (Dfmt == DFMT_UNDEF) { 6036 Error(Loc, "duplicate numeric format"); 6037 return MatchOperand_ParseFail; 6038 } else if (Nfmt == NFMT_UNDEF) { 6039 Error(Loc, "duplicate data format"); 6040 return MatchOperand_ParseFail; 6041 } 6042 } 6043 6044 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6045 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6046 6047 if (isGFX10Plus()) { 6048 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6049 if (Ufmt == UFMT_UNDEF) { 6050 Error(FormatLoc, "unsupported format"); 6051 return MatchOperand_ParseFail; 6052 } 6053 Format = Ufmt; 6054 } else { 6055 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6056 } 6057 6058 return MatchOperand_Success; 6059 } 6060 6061 OperandMatchResultTy 6062 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6063 SMLoc Loc, 6064 int64_t &Format) { 6065 using namespace llvm::AMDGPU::MTBUFFormat; 6066 6067 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6068 if (Id == UFMT_UNDEF) 6069 return MatchOperand_NoMatch; 6070 6071 if (!isGFX10Plus()) { 6072 Error(Loc, "unified format is not supported on this GPU"); 6073 return MatchOperand_ParseFail; 6074 } 6075 6076 Format = Id; 6077 return MatchOperand_Success; 6078 } 6079 6080 OperandMatchResultTy 6081 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6082 using namespace llvm::AMDGPU::MTBUFFormat; 6083 SMLoc Loc = getLoc(); 6084 6085 if (!parseExpr(Format)) 6086 return MatchOperand_ParseFail; 6087 if (!isValidFormatEncoding(Format, getSTI())) { 6088 Error(Loc, "out of range format"); 6089 return MatchOperand_ParseFail; 6090 } 6091 6092 return MatchOperand_Success; 6093 } 6094 6095 OperandMatchResultTy 6096 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6097 using namespace llvm::AMDGPU::MTBUFFormat; 6098 6099 if (!trySkipId("format", AsmToken::Colon)) 6100 return MatchOperand_NoMatch; 6101 6102 if (trySkipToken(AsmToken::LBrac)) { 6103 StringRef FormatStr; 6104 SMLoc Loc = getLoc(); 6105 if (!parseId(FormatStr, "expected a format string")) 6106 return MatchOperand_ParseFail; 6107 6108 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6109 if (Res == MatchOperand_NoMatch) 6110 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6111 if (Res != MatchOperand_Success) 6112 return Res; 6113 6114 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6115 return MatchOperand_ParseFail; 6116 6117 return MatchOperand_Success; 6118 } 6119 6120 return parseNumericFormat(Format); 6121 } 6122 6123 OperandMatchResultTy 6124 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6125 using namespace llvm::AMDGPU::MTBUFFormat; 6126 6127 int64_t Format = getDefaultFormatEncoding(getSTI()); 6128 OperandMatchResultTy Res; 6129 SMLoc Loc = getLoc(); 6130 6131 // Parse legacy format syntax. 6132 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6133 if (Res == MatchOperand_ParseFail) 6134 return Res; 6135 6136 bool FormatFound = (Res == MatchOperand_Success); 6137 6138 Operands.push_back( 6139 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6140 6141 if (FormatFound) 6142 trySkipToken(AsmToken::Comma); 6143 6144 if (isToken(AsmToken::EndOfStatement)) { 6145 // We are expecting an soffset operand, 6146 // but let matcher handle the error. 6147 return MatchOperand_Success; 6148 } 6149 6150 // Parse soffset. 6151 Res = parseRegOrImm(Operands); 6152 if (Res != MatchOperand_Success) 6153 return Res; 6154 6155 trySkipToken(AsmToken::Comma); 6156 6157 if (!FormatFound) { 6158 Res = parseSymbolicOrNumericFormat(Format); 6159 if (Res == MatchOperand_ParseFail) 6160 return Res; 6161 if (Res == MatchOperand_Success) { 6162 auto Size = Operands.size(); 6163 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6164 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6165 Op.setImm(Format); 6166 } 6167 return MatchOperand_Success; 6168 } 6169 6170 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6171 Error(getLoc(), "duplicate format"); 6172 return MatchOperand_ParseFail; 6173 } 6174 return MatchOperand_Success; 6175 } 6176 6177 //===----------------------------------------------------------------------===// 6178 // ds 6179 //===----------------------------------------------------------------------===// 6180 6181 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6182 const OperandVector &Operands) { 6183 OptionalImmIndexMap OptionalIdx; 6184 6185 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6186 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6187 6188 // Add the register arguments 6189 if (Op.isReg()) { 6190 Op.addRegOperands(Inst, 1); 6191 continue; 6192 } 6193 6194 // Handle optional arguments 6195 OptionalIdx[Op.getImmTy()] = i; 6196 } 6197 6198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6199 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6200 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6201 6202 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6203 } 6204 6205 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6206 bool IsGdsHardcoded) { 6207 OptionalImmIndexMap OptionalIdx; 6208 6209 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6210 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6211 6212 // Add the register arguments 6213 if (Op.isReg()) { 6214 Op.addRegOperands(Inst, 1); 6215 continue; 6216 } 6217 6218 if (Op.isToken() && Op.getToken() == "gds") { 6219 IsGdsHardcoded = true; 6220 continue; 6221 } 6222 6223 // Handle optional arguments 6224 OptionalIdx[Op.getImmTy()] = i; 6225 } 6226 6227 AMDGPUOperand::ImmTy OffsetType = 6228 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6229 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6230 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6231 AMDGPUOperand::ImmTyOffset; 6232 6233 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6234 6235 if (!IsGdsHardcoded) { 6236 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6237 } 6238 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6239 } 6240 6241 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6242 OptionalImmIndexMap OptionalIdx; 6243 6244 unsigned OperandIdx[4]; 6245 unsigned EnMask = 0; 6246 int SrcIdx = 0; 6247 6248 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6249 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6250 6251 // Add the register arguments 6252 if (Op.isReg()) { 6253 assert(SrcIdx < 4); 6254 OperandIdx[SrcIdx] = Inst.size(); 6255 Op.addRegOperands(Inst, 1); 6256 ++SrcIdx; 6257 continue; 6258 } 6259 6260 if (Op.isOff()) { 6261 assert(SrcIdx < 4); 6262 OperandIdx[SrcIdx] = Inst.size(); 6263 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6264 ++SrcIdx; 6265 continue; 6266 } 6267 6268 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6269 Op.addImmOperands(Inst, 1); 6270 continue; 6271 } 6272 6273 if (Op.isToken() && Op.getToken() == "done") 6274 continue; 6275 6276 // Handle optional arguments 6277 OptionalIdx[Op.getImmTy()] = i; 6278 } 6279 6280 assert(SrcIdx == 4); 6281 6282 bool Compr = false; 6283 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6284 Compr = true; 6285 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6286 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6287 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6288 } 6289 6290 for (auto i = 0; i < SrcIdx; ++i) { 6291 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6292 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6293 } 6294 } 6295 6296 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6297 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6298 6299 Inst.addOperand(MCOperand::createImm(EnMask)); 6300 } 6301 6302 //===----------------------------------------------------------------------===// 6303 // s_waitcnt 6304 //===----------------------------------------------------------------------===// 6305 6306 static bool 6307 encodeCnt( 6308 const AMDGPU::IsaVersion ISA, 6309 int64_t &IntVal, 6310 int64_t CntVal, 6311 bool Saturate, 6312 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6313 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6314 { 6315 bool Failed = false; 6316 6317 IntVal = encode(ISA, IntVal, CntVal); 6318 if (CntVal != decode(ISA, IntVal)) { 6319 if (Saturate) { 6320 IntVal = encode(ISA, IntVal, -1); 6321 } else { 6322 Failed = true; 6323 } 6324 } 6325 return Failed; 6326 } 6327 6328 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6329 6330 SMLoc CntLoc = getLoc(); 6331 StringRef CntName = getTokenStr(); 6332 6333 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6334 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6335 return false; 6336 6337 int64_t CntVal; 6338 SMLoc ValLoc = getLoc(); 6339 if (!parseExpr(CntVal)) 6340 return false; 6341 6342 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6343 6344 bool Failed = true; 6345 bool Sat = CntName.endswith("_sat"); 6346 6347 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6348 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6349 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6350 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6351 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6352 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6353 } else { 6354 Error(CntLoc, "invalid counter name " + CntName); 6355 return false; 6356 } 6357 6358 if (Failed) { 6359 Error(ValLoc, "too large value for " + CntName); 6360 return false; 6361 } 6362 6363 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6364 return false; 6365 6366 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6367 if (isToken(AsmToken::EndOfStatement)) { 6368 Error(getLoc(), "expected a counter name"); 6369 return false; 6370 } 6371 } 6372 6373 return true; 6374 } 6375 6376 OperandMatchResultTy 6377 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6378 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6379 int64_t Waitcnt = getWaitcntBitMask(ISA); 6380 SMLoc S = getLoc(); 6381 6382 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6383 while (!isToken(AsmToken::EndOfStatement)) { 6384 if (!parseCnt(Waitcnt)) 6385 return MatchOperand_ParseFail; 6386 } 6387 } else { 6388 if (!parseExpr(Waitcnt)) 6389 return MatchOperand_ParseFail; 6390 } 6391 6392 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6393 return MatchOperand_Success; 6394 } 6395 6396 bool AMDGPUAsmParser::parseDelay(int64_t &Delay) { 6397 SMLoc FieldLoc = getLoc(); 6398 StringRef FieldName = getTokenStr(); 6399 if (!skipToken(AsmToken::Identifier, "expected a field name") || 6400 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6401 return false; 6402 6403 SMLoc ValueLoc = getLoc(); 6404 StringRef ValueName = getTokenStr(); 6405 if (!skipToken(AsmToken::Identifier, "expected a value name") || 6406 !skipToken(AsmToken::RParen, "expected a right parenthesis")) 6407 return false; 6408 6409 unsigned Shift; 6410 if (FieldName == "instid0") { 6411 Shift = 0; 6412 } else if (FieldName == "instskip") { 6413 Shift = 4; 6414 } else if (FieldName == "instid1") { 6415 Shift = 7; 6416 } else { 6417 Error(FieldLoc, "invalid field name " + FieldName); 6418 return false; 6419 } 6420 6421 int Value; 6422 if (Shift == 4) { 6423 // Parse values for instskip. 6424 Value = StringSwitch<int>(ValueName) 6425 .Case("SAME", 0) 6426 .Case("NEXT", 1) 6427 .Case("SKIP_1", 2) 6428 .Case("SKIP_2", 3) 6429 .Case("SKIP_3", 4) 6430 .Case("SKIP_4", 5) 6431 .Default(-1); 6432 } else { 6433 // Parse values for instid0 and instid1. 6434 Value = StringSwitch<int>(ValueName) 6435 .Case("NO_DEP", 0) 6436 .Case("VALU_DEP_1", 1) 6437 .Case("VALU_DEP_2", 2) 6438 .Case("VALU_DEP_3", 3) 6439 .Case("VALU_DEP_4", 4) 6440 .Case("TRANS32_DEP_1", 5) 6441 .Case("TRANS32_DEP_2", 6) 6442 .Case("TRANS32_DEP_3", 7) 6443 .Case("FMA_ACCUM_CYCLE_1", 8) 6444 .Case("SALU_CYCLE_1", 9) 6445 .Case("SALU_CYCLE_2", 10) 6446 .Case("SALU_CYCLE_3", 11) 6447 .Default(-1); 6448 } 6449 if (Value < 0) { 6450 Error(ValueLoc, "invalid value name " + ValueName); 6451 return false; 6452 } 6453 6454 Delay |= Value << Shift; 6455 return true; 6456 } 6457 6458 OperandMatchResultTy 6459 AMDGPUAsmParser::parseSDelayAluOps(OperandVector &Operands) { 6460 int64_t Delay = 0; 6461 SMLoc S = getLoc(); 6462 6463 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6464 do { 6465 if (!parseDelay(Delay)) 6466 return MatchOperand_ParseFail; 6467 } while (trySkipToken(AsmToken::Pipe)); 6468 } else { 6469 if (!parseExpr(Delay)) 6470 return MatchOperand_ParseFail; 6471 } 6472 6473 Operands.push_back(AMDGPUOperand::CreateImm(this, Delay, S)); 6474 return MatchOperand_Success; 6475 } 6476 6477 bool 6478 AMDGPUOperand::isSWaitCnt() const { 6479 return isImm(); 6480 } 6481 6482 bool AMDGPUOperand::isSDelayAlu() const { return isImm(); } 6483 6484 //===----------------------------------------------------------------------===// 6485 // DepCtr 6486 //===----------------------------------------------------------------------===// 6487 6488 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6489 StringRef DepCtrName) { 6490 switch (ErrorId) { 6491 case OPR_ID_UNKNOWN: 6492 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6493 return; 6494 case OPR_ID_UNSUPPORTED: 6495 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6496 return; 6497 case OPR_ID_DUPLICATE: 6498 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6499 return; 6500 case OPR_VAL_INVALID: 6501 Error(Loc, Twine("invalid value for ", DepCtrName)); 6502 return; 6503 default: 6504 assert(false); 6505 } 6506 } 6507 6508 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6509 6510 using namespace llvm::AMDGPU::DepCtr; 6511 6512 SMLoc DepCtrLoc = getLoc(); 6513 StringRef DepCtrName = getTokenStr(); 6514 6515 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6516 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6517 return false; 6518 6519 int64_t ExprVal; 6520 if (!parseExpr(ExprVal)) 6521 return false; 6522 6523 unsigned PrevOprMask = UsedOprMask; 6524 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6525 6526 if (CntVal < 0) { 6527 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6528 return false; 6529 } 6530 6531 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6532 return false; 6533 6534 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6535 if (isToken(AsmToken::EndOfStatement)) { 6536 Error(getLoc(), "expected a counter name"); 6537 return false; 6538 } 6539 } 6540 6541 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6542 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6543 return true; 6544 } 6545 6546 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6547 using namespace llvm::AMDGPU::DepCtr; 6548 6549 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6550 SMLoc Loc = getLoc(); 6551 6552 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6553 unsigned UsedOprMask = 0; 6554 while (!isToken(AsmToken::EndOfStatement)) { 6555 if (!parseDepCtr(DepCtr, UsedOprMask)) 6556 return MatchOperand_ParseFail; 6557 } 6558 } else { 6559 if (!parseExpr(DepCtr)) 6560 return MatchOperand_ParseFail; 6561 } 6562 6563 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6564 return MatchOperand_Success; 6565 } 6566 6567 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6568 6569 //===----------------------------------------------------------------------===// 6570 // hwreg 6571 //===----------------------------------------------------------------------===// 6572 6573 bool 6574 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6575 OperandInfoTy &Offset, 6576 OperandInfoTy &Width) { 6577 using namespace llvm::AMDGPU::Hwreg; 6578 6579 // The register may be specified by name or using a numeric code 6580 HwReg.Loc = getLoc(); 6581 if (isToken(AsmToken::Identifier) && 6582 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6583 HwReg.IsSymbolic = true; 6584 lex(); // skip register name 6585 } else if (!parseExpr(HwReg.Id, "a register name")) { 6586 return false; 6587 } 6588 6589 if (trySkipToken(AsmToken::RParen)) 6590 return true; 6591 6592 // parse optional params 6593 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6594 return false; 6595 6596 Offset.Loc = getLoc(); 6597 if (!parseExpr(Offset.Id)) 6598 return false; 6599 6600 if (!skipToken(AsmToken::Comma, "expected a comma")) 6601 return false; 6602 6603 Width.Loc = getLoc(); 6604 return parseExpr(Width.Id) && 6605 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6606 } 6607 6608 bool 6609 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6610 const OperandInfoTy &Offset, 6611 const OperandInfoTy &Width) { 6612 6613 using namespace llvm::AMDGPU::Hwreg; 6614 6615 if (HwReg.IsSymbolic) { 6616 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6617 Error(HwReg.Loc, 6618 "specified hardware register is not supported on this GPU"); 6619 return false; 6620 } 6621 } else { 6622 if (!isValidHwreg(HwReg.Id)) { 6623 Error(HwReg.Loc, 6624 "invalid code of hardware register: only 6-bit values are legal"); 6625 return false; 6626 } 6627 } 6628 if (!isValidHwregOffset(Offset.Id)) { 6629 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6630 return false; 6631 } 6632 if (!isValidHwregWidth(Width.Id)) { 6633 Error(Width.Loc, 6634 "invalid bitfield width: only values from 1 to 32 are legal"); 6635 return false; 6636 } 6637 return true; 6638 } 6639 6640 OperandMatchResultTy 6641 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6642 using namespace llvm::AMDGPU::Hwreg; 6643 6644 int64_t ImmVal = 0; 6645 SMLoc Loc = getLoc(); 6646 6647 if (trySkipId("hwreg", AsmToken::LParen)) { 6648 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6649 OperandInfoTy Offset(OFFSET_DEFAULT_); 6650 OperandInfoTy Width(WIDTH_DEFAULT_); 6651 if (parseHwregBody(HwReg, Offset, Width) && 6652 validateHwreg(HwReg, Offset, Width)) { 6653 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6654 } else { 6655 return MatchOperand_ParseFail; 6656 } 6657 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6658 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6659 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6660 return MatchOperand_ParseFail; 6661 } 6662 } else { 6663 return MatchOperand_ParseFail; 6664 } 6665 6666 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6667 return MatchOperand_Success; 6668 } 6669 6670 bool AMDGPUOperand::isHwreg() const { 6671 return isImmTy(ImmTyHwreg); 6672 } 6673 6674 //===----------------------------------------------------------------------===// 6675 // sendmsg 6676 //===----------------------------------------------------------------------===// 6677 6678 bool 6679 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6680 OperandInfoTy &Op, 6681 OperandInfoTy &Stream) { 6682 using namespace llvm::AMDGPU::SendMsg; 6683 6684 Msg.Loc = getLoc(); 6685 if (isToken(AsmToken::Identifier) && 6686 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6687 Msg.IsSymbolic = true; 6688 lex(); // skip message name 6689 } else if (!parseExpr(Msg.Id, "a message name")) { 6690 return false; 6691 } 6692 6693 if (trySkipToken(AsmToken::Comma)) { 6694 Op.IsDefined = true; 6695 Op.Loc = getLoc(); 6696 if (isToken(AsmToken::Identifier) && 6697 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6698 lex(); // skip operation name 6699 } else if (!parseExpr(Op.Id, "an operation name")) { 6700 return false; 6701 } 6702 6703 if (trySkipToken(AsmToken::Comma)) { 6704 Stream.IsDefined = true; 6705 Stream.Loc = getLoc(); 6706 if (!parseExpr(Stream.Id)) 6707 return false; 6708 } 6709 } 6710 6711 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6712 } 6713 6714 bool 6715 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6716 const OperandInfoTy &Op, 6717 const OperandInfoTy &Stream) { 6718 using namespace llvm::AMDGPU::SendMsg; 6719 6720 // Validation strictness depends on whether message is specified 6721 // in a symbolic or in a numeric form. In the latter case 6722 // only encoding possibility is checked. 6723 bool Strict = Msg.IsSymbolic; 6724 6725 if (Strict) { 6726 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6727 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6728 return false; 6729 } 6730 } else { 6731 if (!isValidMsgId(Msg.Id, getSTI())) { 6732 Error(Msg.Loc, "invalid message id"); 6733 return false; 6734 } 6735 } 6736 if (Strict && (msgRequiresOp(Msg.Id, getSTI()) != Op.IsDefined)) { 6737 if (Op.IsDefined) { 6738 Error(Op.Loc, "message does not support operations"); 6739 } else { 6740 Error(Msg.Loc, "missing message operation"); 6741 } 6742 return false; 6743 } 6744 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6745 Error(Op.Loc, "invalid operation id"); 6746 return false; 6747 } 6748 if (Strict && !msgSupportsStream(Msg.Id, Op.Id, getSTI()) && 6749 Stream.IsDefined) { 6750 Error(Stream.Loc, "message operation does not support streams"); 6751 return false; 6752 } 6753 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6754 Error(Stream.Loc, "invalid message stream id"); 6755 return false; 6756 } 6757 return true; 6758 } 6759 6760 OperandMatchResultTy 6761 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6762 using namespace llvm::AMDGPU::SendMsg; 6763 6764 int64_t ImmVal = 0; 6765 SMLoc Loc = getLoc(); 6766 6767 if (trySkipId("sendmsg", AsmToken::LParen)) { 6768 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6769 OperandInfoTy Op(OP_NONE_); 6770 OperandInfoTy Stream(STREAM_ID_NONE_); 6771 if (parseSendMsgBody(Msg, Op, Stream) && 6772 validateSendMsg(Msg, Op, Stream)) { 6773 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6774 } else { 6775 return MatchOperand_ParseFail; 6776 } 6777 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6778 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6779 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6780 return MatchOperand_ParseFail; 6781 } 6782 } else { 6783 return MatchOperand_ParseFail; 6784 } 6785 6786 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6787 return MatchOperand_Success; 6788 } 6789 6790 bool AMDGPUOperand::isSendMsg() const { 6791 return isImmTy(ImmTySendMsg); 6792 } 6793 6794 //===----------------------------------------------------------------------===// 6795 // v_interp 6796 //===----------------------------------------------------------------------===// 6797 6798 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6799 StringRef Str; 6800 SMLoc S = getLoc(); 6801 6802 if (!parseId(Str)) 6803 return MatchOperand_NoMatch; 6804 6805 int Slot = StringSwitch<int>(Str) 6806 .Case("p10", 0) 6807 .Case("p20", 1) 6808 .Case("p0", 2) 6809 .Default(-1); 6810 6811 if (Slot == -1) { 6812 Error(S, "invalid interpolation slot"); 6813 return MatchOperand_ParseFail; 6814 } 6815 6816 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6817 AMDGPUOperand::ImmTyInterpSlot)); 6818 return MatchOperand_Success; 6819 } 6820 6821 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6822 StringRef Str; 6823 SMLoc S = getLoc(); 6824 6825 if (!parseId(Str)) 6826 return MatchOperand_NoMatch; 6827 6828 if (!Str.startswith("attr")) { 6829 Error(S, "invalid interpolation attribute"); 6830 return MatchOperand_ParseFail; 6831 } 6832 6833 StringRef Chan = Str.take_back(2); 6834 int AttrChan = StringSwitch<int>(Chan) 6835 .Case(".x", 0) 6836 .Case(".y", 1) 6837 .Case(".z", 2) 6838 .Case(".w", 3) 6839 .Default(-1); 6840 if (AttrChan == -1) { 6841 Error(S, "invalid or missing interpolation attribute channel"); 6842 return MatchOperand_ParseFail; 6843 } 6844 6845 Str = Str.drop_back(2).drop_front(4); 6846 6847 uint8_t Attr; 6848 if (Str.getAsInteger(10, Attr)) { 6849 Error(S, "invalid or missing interpolation attribute number"); 6850 return MatchOperand_ParseFail; 6851 } 6852 6853 if (Attr > 63) { 6854 Error(S, "out of bounds interpolation attribute number"); 6855 return MatchOperand_ParseFail; 6856 } 6857 6858 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6859 6860 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6861 AMDGPUOperand::ImmTyInterpAttr)); 6862 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6863 AMDGPUOperand::ImmTyAttrChan)); 6864 return MatchOperand_Success; 6865 } 6866 6867 //===----------------------------------------------------------------------===// 6868 // exp 6869 //===----------------------------------------------------------------------===// 6870 6871 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6872 using namespace llvm::AMDGPU::Exp; 6873 6874 StringRef Str; 6875 SMLoc S = getLoc(); 6876 6877 if (!parseId(Str)) 6878 return MatchOperand_NoMatch; 6879 6880 unsigned Id = getTgtId(Str); 6881 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6882 Error(S, (Id == ET_INVALID) ? 6883 "invalid exp target" : 6884 "exp target is not supported on this GPU"); 6885 return MatchOperand_ParseFail; 6886 } 6887 6888 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6889 AMDGPUOperand::ImmTyExpTgt)); 6890 return MatchOperand_Success; 6891 } 6892 6893 //===----------------------------------------------------------------------===// 6894 // parser helpers 6895 //===----------------------------------------------------------------------===// 6896 6897 bool 6898 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6899 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6900 } 6901 6902 bool 6903 AMDGPUAsmParser::isId(const StringRef Id) const { 6904 return isId(getToken(), Id); 6905 } 6906 6907 bool 6908 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6909 return getTokenKind() == Kind; 6910 } 6911 6912 bool 6913 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6914 if (isId(Id)) { 6915 lex(); 6916 return true; 6917 } 6918 return false; 6919 } 6920 6921 bool 6922 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6923 if (isToken(AsmToken::Identifier)) { 6924 StringRef Tok = getTokenStr(); 6925 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6926 lex(); 6927 return true; 6928 } 6929 } 6930 return false; 6931 } 6932 6933 bool 6934 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6935 if (isId(Id) && peekToken().is(Kind)) { 6936 lex(); 6937 lex(); 6938 return true; 6939 } 6940 return false; 6941 } 6942 6943 bool 6944 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6945 if (isToken(Kind)) { 6946 lex(); 6947 return true; 6948 } 6949 return false; 6950 } 6951 6952 bool 6953 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6954 const StringRef ErrMsg) { 6955 if (!trySkipToken(Kind)) { 6956 Error(getLoc(), ErrMsg); 6957 return false; 6958 } 6959 return true; 6960 } 6961 6962 bool 6963 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6964 SMLoc S = getLoc(); 6965 6966 const MCExpr *Expr; 6967 if (Parser.parseExpression(Expr)) 6968 return false; 6969 6970 if (Expr->evaluateAsAbsolute(Imm)) 6971 return true; 6972 6973 if (Expected.empty()) { 6974 Error(S, "expected absolute expression"); 6975 } else { 6976 Error(S, Twine("expected ", Expected) + 6977 Twine(" or an absolute expression")); 6978 } 6979 return false; 6980 } 6981 6982 bool 6983 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6984 SMLoc S = getLoc(); 6985 6986 const MCExpr *Expr; 6987 if (Parser.parseExpression(Expr)) 6988 return false; 6989 6990 int64_t IntVal; 6991 if (Expr->evaluateAsAbsolute(IntVal)) { 6992 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6993 } else { 6994 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6995 } 6996 return true; 6997 } 6998 6999 bool 7000 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 7001 if (isToken(AsmToken::String)) { 7002 Val = getToken().getStringContents(); 7003 lex(); 7004 return true; 7005 } else { 7006 Error(getLoc(), ErrMsg); 7007 return false; 7008 } 7009 } 7010 7011 bool 7012 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 7013 if (isToken(AsmToken::Identifier)) { 7014 Val = getTokenStr(); 7015 lex(); 7016 return true; 7017 } else { 7018 if (!ErrMsg.empty()) 7019 Error(getLoc(), ErrMsg); 7020 return false; 7021 } 7022 } 7023 7024 AsmToken 7025 AMDGPUAsmParser::getToken() const { 7026 return Parser.getTok(); 7027 } 7028 7029 AsmToken 7030 AMDGPUAsmParser::peekToken() { 7031 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 7032 } 7033 7034 void 7035 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 7036 auto TokCount = getLexer().peekTokens(Tokens); 7037 7038 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 7039 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 7040 } 7041 7042 AsmToken::TokenKind 7043 AMDGPUAsmParser::getTokenKind() const { 7044 return getLexer().getKind(); 7045 } 7046 7047 SMLoc 7048 AMDGPUAsmParser::getLoc() const { 7049 return getToken().getLoc(); 7050 } 7051 7052 StringRef 7053 AMDGPUAsmParser::getTokenStr() const { 7054 return getToken().getString(); 7055 } 7056 7057 void 7058 AMDGPUAsmParser::lex() { 7059 Parser.Lex(); 7060 } 7061 7062 SMLoc 7063 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 7064 const OperandVector &Operands) const { 7065 for (unsigned i = Operands.size() - 1; i > 0; --i) { 7066 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7067 if (Test(Op)) 7068 return Op.getStartLoc(); 7069 } 7070 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 7071 } 7072 7073 SMLoc 7074 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 7075 const OperandVector &Operands) const { 7076 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 7077 return getOperandLoc(Test, Operands); 7078 } 7079 7080 SMLoc 7081 AMDGPUAsmParser::getRegLoc(unsigned Reg, 7082 const OperandVector &Operands) const { 7083 auto Test = [=](const AMDGPUOperand& Op) { 7084 return Op.isRegKind() && Op.getReg() == Reg; 7085 }; 7086 return getOperandLoc(Test, Operands); 7087 } 7088 7089 SMLoc 7090 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 7091 auto Test = [](const AMDGPUOperand& Op) { 7092 return Op.IsImmKindLiteral() || Op.isExpr(); 7093 }; 7094 return getOperandLoc(Test, Operands); 7095 } 7096 7097 SMLoc 7098 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 7099 auto Test = [](const AMDGPUOperand& Op) { 7100 return Op.isImmKindConst(); 7101 }; 7102 return getOperandLoc(Test, Operands); 7103 } 7104 7105 //===----------------------------------------------------------------------===// 7106 // swizzle 7107 //===----------------------------------------------------------------------===// 7108 7109 LLVM_READNONE 7110 static unsigned 7111 encodeBitmaskPerm(const unsigned AndMask, 7112 const unsigned OrMask, 7113 const unsigned XorMask) { 7114 using namespace llvm::AMDGPU::Swizzle; 7115 7116 return BITMASK_PERM_ENC | 7117 (AndMask << BITMASK_AND_SHIFT) | 7118 (OrMask << BITMASK_OR_SHIFT) | 7119 (XorMask << BITMASK_XOR_SHIFT); 7120 } 7121 7122 bool 7123 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 7124 const unsigned MinVal, 7125 const unsigned MaxVal, 7126 const StringRef ErrMsg, 7127 SMLoc &Loc) { 7128 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7129 return false; 7130 } 7131 Loc = getLoc(); 7132 if (!parseExpr(Op)) { 7133 return false; 7134 } 7135 if (Op < MinVal || Op > MaxVal) { 7136 Error(Loc, ErrMsg); 7137 return false; 7138 } 7139 7140 return true; 7141 } 7142 7143 bool 7144 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7145 const unsigned MinVal, 7146 const unsigned MaxVal, 7147 const StringRef ErrMsg) { 7148 SMLoc Loc; 7149 for (unsigned i = 0; i < OpNum; ++i) { 7150 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7151 return false; 7152 } 7153 7154 return true; 7155 } 7156 7157 bool 7158 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7159 using namespace llvm::AMDGPU::Swizzle; 7160 7161 int64_t Lane[LANE_NUM]; 7162 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7163 "expected a 2-bit lane id")) { 7164 Imm = QUAD_PERM_ENC; 7165 for (unsigned I = 0; I < LANE_NUM; ++I) { 7166 Imm |= Lane[I] << (LANE_SHIFT * I); 7167 } 7168 return true; 7169 } 7170 return false; 7171 } 7172 7173 bool 7174 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7175 using namespace llvm::AMDGPU::Swizzle; 7176 7177 SMLoc Loc; 7178 int64_t GroupSize; 7179 int64_t LaneIdx; 7180 7181 if (!parseSwizzleOperand(GroupSize, 7182 2, 32, 7183 "group size must be in the interval [2,32]", 7184 Loc)) { 7185 return false; 7186 } 7187 if (!isPowerOf2_64(GroupSize)) { 7188 Error(Loc, "group size must be a power of two"); 7189 return false; 7190 } 7191 if (parseSwizzleOperand(LaneIdx, 7192 0, GroupSize - 1, 7193 "lane id must be in the interval [0,group size - 1]", 7194 Loc)) { 7195 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7196 return true; 7197 } 7198 return false; 7199 } 7200 7201 bool 7202 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7203 using namespace llvm::AMDGPU::Swizzle; 7204 7205 SMLoc Loc; 7206 int64_t GroupSize; 7207 7208 if (!parseSwizzleOperand(GroupSize, 7209 2, 32, 7210 "group size must be in the interval [2,32]", 7211 Loc)) { 7212 return false; 7213 } 7214 if (!isPowerOf2_64(GroupSize)) { 7215 Error(Loc, "group size must be a power of two"); 7216 return false; 7217 } 7218 7219 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7220 return true; 7221 } 7222 7223 bool 7224 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7225 using namespace llvm::AMDGPU::Swizzle; 7226 7227 SMLoc Loc; 7228 int64_t GroupSize; 7229 7230 if (!parseSwizzleOperand(GroupSize, 7231 1, 16, 7232 "group size must be in the interval [1,16]", 7233 Loc)) { 7234 return false; 7235 } 7236 if (!isPowerOf2_64(GroupSize)) { 7237 Error(Loc, "group size must be a power of two"); 7238 return false; 7239 } 7240 7241 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7242 return true; 7243 } 7244 7245 bool 7246 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7247 using namespace llvm::AMDGPU::Swizzle; 7248 7249 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7250 return false; 7251 } 7252 7253 StringRef Ctl; 7254 SMLoc StrLoc = getLoc(); 7255 if (!parseString(Ctl)) { 7256 return false; 7257 } 7258 if (Ctl.size() != BITMASK_WIDTH) { 7259 Error(StrLoc, "expected a 5-character mask"); 7260 return false; 7261 } 7262 7263 unsigned AndMask = 0; 7264 unsigned OrMask = 0; 7265 unsigned XorMask = 0; 7266 7267 for (size_t i = 0; i < Ctl.size(); ++i) { 7268 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7269 switch(Ctl[i]) { 7270 default: 7271 Error(StrLoc, "invalid mask"); 7272 return false; 7273 case '0': 7274 break; 7275 case '1': 7276 OrMask |= Mask; 7277 break; 7278 case 'p': 7279 AndMask |= Mask; 7280 break; 7281 case 'i': 7282 AndMask |= Mask; 7283 XorMask |= Mask; 7284 break; 7285 } 7286 } 7287 7288 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7289 return true; 7290 } 7291 7292 bool 7293 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7294 7295 SMLoc OffsetLoc = getLoc(); 7296 7297 if (!parseExpr(Imm, "a swizzle macro")) { 7298 return false; 7299 } 7300 if (!isUInt<16>(Imm)) { 7301 Error(OffsetLoc, "expected a 16-bit offset"); 7302 return false; 7303 } 7304 return true; 7305 } 7306 7307 bool 7308 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7309 using namespace llvm::AMDGPU::Swizzle; 7310 7311 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7312 7313 SMLoc ModeLoc = getLoc(); 7314 bool Ok = false; 7315 7316 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7317 Ok = parseSwizzleQuadPerm(Imm); 7318 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7319 Ok = parseSwizzleBitmaskPerm(Imm); 7320 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7321 Ok = parseSwizzleBroadcast(Imm); 7322 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7323 Ok = parseSwizzleSwap(Imm); 7324 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7325 Ok = parseSwizzleReverse(Imm); 7326 } else { 7327 Error(ModeLoc, "expected a swizzle mode"); 7328 } 7329 7330 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7331 } 7332 7333 return false; 7334 } 7335 7336 OperandMatchResultTy 7337 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7338 SMLoc S = getLoc(); 7339 int64_t Imm = 0; 7340 7341 if (trySkipId("offset")) { 7342 7343 bool Ok = false; 7344 if (skipToken(AsmToken::Colon, "expected a colon")) { 7345 if (trySkipId("swizzle")) { 7346 Ok = parseSwizzleMacro(Imm); 7347 } else { 7348 Ok = parseSwizzleOffset(Imm); 7349 } 7350 } 7351 7352 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7353 7354 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7355 } else { 7356 // Swizzle "offset" operand is optional. 7357 // If it is omitted, try parsing other optional operands. 7358 return parseOptionalOpr(Operands); 7359 } 7360 } 7361 7362 bool 7363 AMDGPUOperand::isSwizzle() const { 7364 return isImmTy(ImmTySwizzle); 7365 } 7366 7367 //===----------------------------------------------------------------------===// 7368 // VGPR Index Mode 7369 //===----------------------------------------------------------------------===// 7370 7371 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7372 7373 using namespace llvm::AMDGPU::VGPRIndexMode; 7374 7375 if (trySkipToken(AsmToken::RParen)) { 7376 return OFF; 7377 } 7378 7379 int64_t Imm = 0; 7380 7381 while (true) { 7382 unsigned Mode = 0; 7383 SMLoc S = getLoc(); 7384 7385 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7386 if (trySkipId(IdSymbolic[ModeId])) { 7387 Mode = 1 << ModeId; 7388 break; 7389 } 7390 } 7391 7392 if (Mode == 0) { 7393 Error(S, (Imm == 0)? 7394 "expected a VGPR index mode or a closing parenthesis" : 7395 "expected a VGPR index mode"); 7396 return UNDEF; 7397 } 7398 7399 if (Imm & Mode) { 7400 Error(S, "duplicate VGPR index mode"); 7401 return UNDEF; 7402 } 7403 Imm |= Mode; 7404 7405 if (trySkipToken(AsmToken::RParen)) 7406 break; 7407 if (!skipToken(AsmToken::Comma, 7408 "expected a comma or a closing parenthesis")) 7409 return UNDEF; 7410 } 7411 7412 return Imm; 7413 } 7414 7415 OperandMatchResultTy 7416 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7417 7418 using namespace llvm::AMDGPU::VGPRIndexMode; 7419 7420 int64_t Imm = 0; 7421 SMLoc S = getLoc(); 7422 7423 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7424 Imm = parseGPRIdxMacro(); 7425 if (Imm == UNDEF) 7426 return MatchOperand_ParseFail; 7427 } else { 7428 if (getParser().parseAbsoluteExpression(Imm)) 7429 return MatchOperand_ParseFail; 7430 if (Imm < 0 || !isUInt<4>(Imm)) { 7431 Error(S, "invalid immediate: only 4-bit values are legal"); 7432 return MatchOperand_ParseFail; 7433 } 7434 } 7435 7436 Operands.push_back( 7437 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7438 return MatchOperand_Success; 7439 } 7440 7441 bool AMDGPUOperand::isGPRIdxMode() const { 7442 return isImmTy(ImmTyGprIdxMode); 7443 } 7444 7445 //===----------------------------------------------------------------------===// 7446 // sopp branch targets 7447 //===----------------------------------------------------------------------===// 7448 7449 OperandMatchResultTy 7450 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7451 7452 // Make sure we are not parsing something 7453 // that looks like a label or an expression but is not. 7454 // This will improve error messages. 7455 if (isRegister() || isModifier()) 7456 return MatchOperand_NoMatch; 7457 7458 if (!parseExpr(Operands)) 7459 return MatchOperand_ParseFail; 7460 7461 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7462 assert(Opr.isImm() || Opr.isExpr()); 7463 SMLoc Loc = Opr.getStartLoc(); 7464 7465 // Currently we do not support arbitrary expressions as branch targets. 7466 // Only labels and absolute expressions are accepted. 7467 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7468 Error(Loc, "expected an absolute expression or a label"); 7469 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7470 Error(Loc, "expected a 16-bit signed jump offset"); 7471 } 7472 7473 return MatchOperand_Success; 7474 } 7475 7476 //===----------------------------------------------------------------------===// 7477 // Boolean holding registers 7478 //===----------------------------------------------------------------------===// 7479 7480 OperandMatchResultTy 7481 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7482 return parseReg(Operands); 7483 } 7484 7485 //===----------------------------------------------------------------------===// 7486 // mubuf 7487 //===----------------------------------------------------------------------===// 7488 7489 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7490 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7491 } 7492 7493 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7494 const OperandVector &Operands, 7495 bool IsAtomic, 7496 bool IsLds) { 7497 OptionalImmIndexMap OptionalIdx; 7498 unsigned FirstOperandIdx = 1; 7499 bool IsAtomicReturn = false; 7500 7501 if (IsAtomic) { 7502 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7503 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7504 if (!Op.isCPol()) 7505 continue; 7506 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7507 break; 7508 } 7509 7510 if (!IsAtomicReturn) { 7511 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7512 if (NewOpc != -1) 7513 Inst.setOpcode(NewOpc); 7514 } 7515 7516 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7517 SIInstrFlags::IsAtomicRet; 7518 } 7519 7520 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7521 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7522 7523 // Add the register arguments 7524 if (Op.isReg()) { 7525 Op.addRegOperands(Inst, 1); 7526 // Insert a tied src for atomic return dst. 7527 // This cannot be postponed as subsequent calls to 7528 // addImmOperands rely on correct number of MC operands. 7529 if (IsAtomicReturn && i == FirstOperandIdx) 7530 Op.addRegOperands(Inst, 1); 7531 continue; 7532 } 7533 7534 // Handle the case where soffset is an immediate 7535 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7536 Op.addImmOperands(Inst, 1); 7537 continue; 7538 } 7539 7540 // Handle tokens like 'offen' which are sometimes hard-coded into the 7541 // asm string. There are no MCInst operands for these. 7542 if (Op.isToken()) { 7543 continue; 7544 } 7545 assert(Op.isImm()); 7546 7547 // Handle optional arguments 7548 OptionalIdx[Op.getImmTy()] = i; 7549 } 7550 7551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7552 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7553 7554 if (!IsLds) { // tfe is not legal with lds opcodes 7555 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7556 } 7557 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7558 } 7559 7560 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7561 OptionalImmIndexMap OptionalIdx; 7562 7563 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7564 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7565 7566 // Add the register arguments 7567 if (Op.isReg()) { 7568 Op.addRegOperands(Inst, 1); 7569 continue; 7570 } 7571 7572 // Handle the case where soffset is an immediate 7573 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7574 Op.addImmOperands(Inst, 1); 7575 continue; 7576 } 7577 7578 // Handle tokens like 'offen' which are sometimes hard-coded into the 7579 // asm string. There are no MCInst operands for these. 7580 if (Op.isToken()) { 7581 continue; 7582 } 7583 assert(Op.isImm()); 7584 7585 // Handle optional arguments 7586 OptionalIdx[Op.getImmTy()] = i; 7587 } 7588 7589 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7590 AMDGPUOperand::ImmTyOffset); 7591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7593 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7594 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7595 } 7596 7597 //===----------------------------------------------------------------------===// 7598 // mimg 7599 //===----------------------------------------------------------------------===// 7600 7601 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7602 bool IsAtomic) { 7603 unsigned I = 1; 7604 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7605 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7606 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7607 } 7608 7609 if (IsAtomic) { 7610 // Add src, same as dst 7611 assert(Desc.getNumDefs() == 1); 7612 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7613 } 7614 7615 OptionalImmIndexMap OptionalIdx; 7616 7617 for (unsigned E = Operands.size(); I != E; ++I) { 7618 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7619 7620 // Add the register arguments 7621 if (Op.isReg()) { 7622 Op.addRegOperands(Inst, 1); 7623 } else if (Op.isImmModifier()) { 7624 OptionalIdx[Op.getImmTy()] = I; 7625 } else if (!Op.isToken()) { 7626 llvm_unreachable("unexpected operand type"); 7627 } 7628 } 7629 7630 bool IsGFX10Plus = isGFX10Plus(); 7631 7632 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7633 if (IsGFX10Plus) 7634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7635 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7636 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7637 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7638 if (IsGFX10Plus) 7639 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7640 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7641 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7642 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7643 if (!IsGFX10Plus) 7644 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7645 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7646 } 7647 7648 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7649 cvtMIMG(Inst, Operands, true); 7650 } 7651 7652 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7653 OptionalImmIndexMap OptionalIdx; 7654 bool IsAtomicReturn = false; 7655 7656 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7657 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7658 if (!Op.isCPol()) 7659 continue; 7660 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7661 break; 7662 } 7663 7664 if (!IsAtomicReturn) { 7665 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7666 if (NewOpc != -1) 7667 Inst.setOpcode(NewOpc); 7668 } 7669 7670 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7671 SIInstrFlags::IsAtomicRet; 7672 7673 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7674 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7675 7676 // Add the register arguments 7677 if (Op.isReg()) { 7678 Op.addRegOperands(Inst, 1); 7679 if (IsAtomicReturn && i == 1) 7680 Op.addRegOperands(Inst, 1); 7681 continue; 7682 } 7683 7684 // Handle the case where soffset is an immediate 7685 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7686 Op.addImmOperands(Inst, 1); 7687 continue; 7688 } 7689 7690 // Handle tokens like 'offen' which are sometimes hard-coded into the 7691 // asm string. There are no MCInst operands for these. 7692 if (Op.isToken()) { 7693 continue; 7694 } 7695 assert(Op.isImm()); 7696 7697 // Handle optional arguments 7698 OptionalIdx[Op.getImmTy()] = i; 7699 } 7700 7701 if ((int)Inst.getNumOperands() <= 7702 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7703 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7704 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7705 } 7706 7707 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7708 const OperandVector &Operands) { 7709 for (unsigned I = 1; I < Operands.size(); ++I) { 7710 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7711 if (Operand.isReg()) 7712 Operand.addRegOperands(Inst, 1); 7713 } 7714 7715 Inst.addOperand(MCOperand::createImm(1)); // a16 7716 } 7717 7718 //===----------------------------------------------------------------------===// 7719 // smrd 7720 //===----------------------------------------------------------------------===// 7721 7722 bool AMDGPUOperand::isSMRDOffset8() const { 7723 return isImm() && isUInt<8>(getImm()); 7724 } 7725 7726 bool AMDGPUOperand::isSMEMOffset() const { 7727 return isImm(); // Offset range is checked later by validator. 7728 } 7729 7730 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7731 // 32-bit literals are only supported on CI and we only want to use them 7732 // when the offset is > 8-bits. 7733 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7734 } 7735 7736 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7737 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7738 } 7739 7740 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7741 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7742 } 7743 7744 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7745 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7746 } 7747 7748 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7749 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7750 } 7751 7752 //===----------------------------------------------------------------------===// 7753 // vop3 7754 //===----------------------------------------------------------------------===// 7755 7756 static bool ConvertOmodMul(int64_t &Mul) { 7757 if (Mul != 1 && Mul != 2 && Mul != 4) 7758 return false; 7759 7760 Mul >>= 1; 7761 return true; 7762 } 7763 7764 static bool ConvertOmodDiv(int64_t &Div) { 7765 if (Div == 1) { 7766 Div = 0; 7767 return true; 7768 } 7769 7770 if (Div == 2) { 7771 Div = 3; 7772 return true; 7773 } 7774 7775 return false; 7776 } 7777 7778 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7779 // This is intentional and ensures compatibility with sp3. 7780 // See bug 35397 for details. 7781 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7782 if (BoundCtrl == 0 || BoundCtrl == 1) { 7783 BoundCtrl = 1; 7784 return true; 7785 } 7786 return false; 7787 } 7788 7789 // Note: the order in this table matches the order of operands in AsmString. 7790 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7791 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7792 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7793 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7794 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7795 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7796 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7797 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7798 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7799 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7800 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7801 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7802 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7803 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7804 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7805 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7806 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7807 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7808 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7809 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7810 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7811 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7812 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7813 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7814 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7815 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7816 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7817 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7818 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7819 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7820 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7821 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7822 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7823 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7824 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7825 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7826 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7827 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7828 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7829 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7830 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7831 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7832 }; 7833 7834 void AMDGPUAsmParser::onBeginOfFile() { 7835 if (!getParser().getStreamer().getTargetStreamer() || 7836 getSTI().getTargetTriple().getArch() == Triple::r600) 7837 return; 7838 7839 if (!getTargetStreamer().getTargetID()) 7840 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7841 7842 if (isHsaAbiVersion3AndAbove(&getSTI())) 7843 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7844 } 7845 7846 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7847 7848 OperandMatchResultTy res = parseOptionalOpr(Operands); 7849 7850 // This is a hack to enable hardcoded mandatory operands which follow 7851 // optional operands. 7852 // 7853 // Current design assumes that all operands after the first optional operand 7854 // are also optional. However implementation of some instructions violates 7855 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7856 // 7857 // To alleviate this problem, we have to (implicitly) parse extra operands 7858 // to make sure autogenerated parser of custom operands never hit hardcoded 7859 // mandatory operands. 7860 7861 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7862 if (res != MatchOperand_Success || 7863 isToken(AsmToken::EndOfStatement)) 7864 break; 7865 7866 trySkipToken(AsmToken::Comma); 7867 res = parseOptionalOpr(Operands); 7868 } 7869 7870 return res; 7871 } 7872 7873 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7874 OperandMatchResultTy res; 7875 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7876 // try to parse any optional operand here 7877 if (Op.IsBit) { 7878 res = parseNamedBit(Op.Name, Operands, Op.Type); 7879 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7880 res = parseOModOperand(Operands); 7881 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7882 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7883 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7884 res = parseSDWASel(Operands, Op.Name, Op.Type); 7885 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7886 res = parseSDWADstUnused(Operands); 7887 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7888 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7889 Op.Type == AMDGPUOperand::ImmTyNegLo || 7890 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7891 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7892 Op.ConvertResult); 7893 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7894 res = parseDim(Operands); 7895 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7896 res = parseCPol(Operands); 7897 } else { 7898 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7899 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7900 res = parseOperandArrayWithPrefix("neg", Operands, 7901 AMDGPUOperand::ImmTyBLGP, 7902 nullptr); 7903 } 7904 } 7905 if (res != MatchOperand_NoMatch) { 7906 return res; 7907 } 7908 } 7909 return MatchOperand_NoMatch; 7910 } 7911 7912 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7913 StringRef Name = getTokenStr(); 7914 if (Name == "mul") { 7915 return parseIntWithPrefix("mul", Operands, 7916 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7917 } 7918 7919 if (Name == "div") { 7920 return parseIntWithPrefix("div", Operands, 7921 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7922 } 7923 7924 return MatchOperand_NoMatch; 7925 } 7926 7927 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7928 cvtVOP3P(Inst, Operands); 7929 7930 int Opc = Inst.getOpcode(); 7931 7932 int SrcNum; 7933 const int Ops[] = { AMDGPU::OpName::src0, 7934 AMDGPU::OpName::src1, 7935 AMDGPU::OpName::src2 }; 7936 for (SrcNum = 0; 7937 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7938 ++SrcNum); 7939 assert(SrcNum > 0); 7940 7941 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7942 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7943 7944 if ((OpSel & (1 << SrcNum)) != 0) { 7945 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7946 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7947 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7948 } 7949 } 7950 7951 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7952 // 1. This operand is input modifiers 7953 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7954 // 2. This is not last operand 7955 && Desc.NumOperands > (OpNum + 1) 7956 // 3. Next operand is register class 7957 && Desc.OpInfo[OpNum + 1].RegClass != -1 7958 // 4. Next register is not tied to any other operand 7959 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7960 } 7961 7962 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7963 { 7964 OptionalImmIndexMap OptionalIdx; 7965 unsigned Opc = Inst.getOpcode(); 7966 7967 unsigned I = 1; 7968 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7969 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7970 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7971 } 7972 7973 for (unsigned E = Operands.size(); I != E; ++I) { 7974 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7975 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7976 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7977 } else if (Op.isInterpSlot() || 7978 Op.isInterpAttr() || 7979 Op.isAttrChan()) { 7980 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7981 } else if (Op.isImmModifier()) { 7982 OptionalIdx[Op.getImmTy()] = I; 7983 } else { 7984 llvm_unreachable("unhandled operand type"); 7985 } 7986 } 7987 7988 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7989 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7990 } 7991 7992 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7994 } 7995 7996 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7997 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7998 } 7999 } 8000 8001 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 8002 OptionalImmIndexMap &OptionalIdx) { 8003 unsigned Opc = Inst.getOpcode(); 8004 8005 unsigned I = 1; 8006 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8007 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8008 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8009 } 8010 8011 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 8012 // This instruction has src modifiers 8013 for (unsigned E = Operands.size(); I != E; ++I) { 8014 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8015 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8016 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 8017 } else if (Op.isImmModifier()) { 8018 OptionalIdx[Op.getImmTy()] = I; 8019 } else if (Op.isRegOrImm()) { 8020 Op.addRegOrImmOperands(Inst, 1); 8021 } else { 8022 llvm_unreachable("unhandled operand type"); 8023 } 8024 } 8025 } else { 8026 // No src modifiers 8027 for (unsigned E = Operands.size(); I != E; ++I) { 8028 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8029 if (Op.isMod()) { 8030 OptionalIdx[Op.getImmTy()] = I; 8031 } else { 8032 Op.addRegOrImmOperands(Inst, 1); 8033 } 8034 } 8035 } 8036 8037 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 8038 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 8039 } 8040 8041 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 8042 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 8043 } 8044 8045 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 8046 // it has src2 register operand that is tied to dst operand 8047 // we don't allow modifiers for this operand in assembler so src2_modifiers 8048 // should be 0. 8049 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 8050 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 8051 Opc == AMDGPU::V_MAC_F32_e64_vi || 8052 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 8053 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 8054 Opc == AMDGPU::V_MAC_F16_e64_vi || 8055 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 8056 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 8057 Opc == AMDGPU::V_FMAC_F32_e64_vi || 8058 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 8059 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 8060 auto it = Inst.begin(); 8061 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 8062 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 8063 ++it; 8064 // Copy the operand to ensure it's not invalidated when Inst grows. 8065 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 8066 } 8067 } 8068 8069 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 8070 OptionalImmIndexMap OptionalIdx; 8071 cvtVOP3(Inst, Operands, OptionalIdx); 8072 } 8073 8074 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 8075 OptionalImmIndexMap &OptIdx) { 8076 const int Opc = Inst.getOpcode(); 8077 const MCInstrDesc &Desc = MII.get(Opc); 8078 8079 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 8080 8081 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 8082 assert(!IsPacked); 8083 Inst.addOperand(Inst.getOperand(0)); 8084 } 8085 8086 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 8087 // instruction, and then figure out where to actually put the modifiers 8088 8089 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 8090 if (OpSelIdx != -1) { 8091 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 8092 } 8093 8094 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 8095 if (OpSelHiIdx != -1) { 8096 int DefaultVal = IsPacked ? -1 : 0; 8097 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 8098 DefaultVal); 8099 } 8100 8101 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 8102 if (NegLoIdx != -1) { 8103 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 8104 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 8105 } 8106 8107 const int Ops[] = { AMDGPU::OpName::src0, 8108 AMDGPU::OpName::src1, 8109 AMDGPU::OpName::src2 }; 8110 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 8111 AMDGPU::OpName::src1_modifiers, 8112 AMDGPU::OpName::src2_modifiers }; 8113 8114 unsigned OpSel = 0; 8115 unsigned OpSelHi = 0; 8116 unsigned NegLo = 0; 8117 unsigned NegHi = 0; 8118 8119 if (OpSelIdx != -1) 8120 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8121 8122 if (OpSelHiIdx != -1) 8123 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8124 8125 if (NegLoIdx != -1) { 8126 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8127 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8128 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8129 } 8130 8131 for (int J = 0; J < 3; ++J) { 8132 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8133 if (OpIdx == -1) 8134 break; 8135 8136 uint32_t ModVal = 0; 8137 8138 if ((OpSel & (1 << J)) != 0) 8139 ModVal |= SISrcMods::OP_SEL_0; 8140 8141 if ((OpSelHi & (1 << J)) != 0) 8142 ModVal |= SISrcMods::OP_SEL_1; 8143 8144 if ((NegLo & (1 << J)) != 0) 8145 ModVal |= SISrcMods::NEG; 8146 8147 if ((NegHi & (1 << J)) != 0) 8148 ModVal |= SISrcMods::NEG_HI; 8149 8150 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8151 8152 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8153 } 8154 } 8155 8156 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8157 OptionalImmIndexMap OptIdx; 8158 cvtVOP3(Inst, Operands, OptIdx); 8159 cvtVOP3P(Inst, Operands, OptIdx); 8160 } 8161 8162 //===----------------------------------------------------------------------===// 8163 // dpp 8164 //===----------------------------------------------------------------------===// 8165 8166 bool AMDGPUOperand::isDPP8() const { 8167 return isImmTy(ImmTyDPP8); 8168 } 8169 8170 bool AMDGPUOperand::isDPPCtrl() const { 8171 using namespace AMDGPU::DPP; 8172 8173 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8174 if (result) { 8175 int64_t Imm = getImm(); 8176 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8177 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8178 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8179 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8180 (Imm == DppCtrl::WAVE_SHL1) || 8181 (Imm == DppCtrl::WAVE_ROL1) || 8182 (Imm == DppCtrl::WAVE_SHR1) || 8183 (Imm == DppCtrl::WAVE_ROR1) || 8184 (Imm == DppCtrl::ROW_MIRROR) || 8185 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8186 (Imm == DppCtrl::BCAST15) || 8187 (Imm == DppCtrl::BCAST31) || 8188 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8189 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8190 } 8191 return false; 8192 } 8193 8194 //===----------------------------------------------------------------------===// 8195 // mAI 8196 //===----------------------------------------------------------------------===// 8197 8198 bool AMDGPUOperand::isBLGP() const { 8199 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8200 } 8201 8202 bool AMDGPUOperand::isCBSZ() const { 8203 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8204 } 8205 8206 bool AMDGPUOperand::isABID() const { 8207 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8208 } 8209 8210 bool AMDGPUOperand::isS16Imm() const { 8211 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8212 } 8213 8214 bool AMDGPUOperand::isU16Imm() const { 8215 return isImm() && isUInt<16>(getImm()); 8216 } 8217 8218 //===----------------------------------------------------------------------===// 8219 // dim 8220 //===----------------------------------------------------------------------===// 8221 8222 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8223 // We want to allow "dim:1D" etc., 8224 // but the initial 1 is tokenized as an integer. 8225 std::string Token; 8226 if (isToken(AsmToken::Integer)) { 8227 SMLoc Loc = getToken().getEndLoc(); 8228 Token = std::string(getTokenStr()); 8229 lex(); 8230 if (getLoc() != Loc) 8231 return false; 8232 } 8233 8234 StringRef Suffix; 8235 if (!parseId(Suffix)) 8236 return false; 8237 Token += Suffix; 8238 8239 StringRef DimId = Token; 8240 if (DimId.startswith("SQ_RSRC_IMG_")) 8241 DimId = DimId.drop_front(12); 8242 8243 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8244 if (!DimInfo) 8245 return false; 8246 8247 Encoding = DimInfo->Encoding; 8248 return true; 8249 } 8250 8251 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8252 if (!isGFX10Plus()) 8253 return MatchOperand_NoMatch; 8254 8255 SMLoc S = getLoc(); 8256 8257 if (!trySkipId("dim", AsmToken::Colon)) 8258 return MatchOperand_NoMatch; 8259 8260 unsigned Encoding; 8261 SMLoc Loc = getLoc(); 8262 if (!parseDimId(Encoding)) { 8263 Error(Loc, "invalid dim value"); 8264 return MatchOperand_ParseFail; 8265 } 8266 8267 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8268 AMDGPUOperand::ImmTyDim)); 8269 return MatchOperand_Success; 8270 } 8271 8272 //===----------------------------------------------------------------------===// 8273 // dpp 8274 //===----------------------------------------------------------------------===// 8275 8276 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8277 SMLoc S = getLoc(); 8278 8279 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8280 return MatchOperand_NoMatch; 8281 8282 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8283 8284 int64_t Sels[8]; 8285 8286 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8287 return MatchOperand_ParseFail; 8288 8289 for (size_t i = 0; i < 8; ++i) { 8290 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8291 return MatchOperand_ParseFail; 8292 8293 SMLoc Loc = getLoc(); 8294 if (getParser().parseAbsoluteExpression(Sels[i])) 8295 return MatchOperand_ParseFail; 8296 if (0 > Sels[i] || 7 < Sels[i]) { 8297 Error(Loc, "expected a 3-bit value"); 8298 return MatchOperand_ParseFail; 8299 } 8300 } 8301 8302 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8303 return MatchOperand_ParseFail; 8304 8305 unsigned DPP8 = 0; 8306 for (size_t i = 0; i < 8; ++i) 8307 DPP8 |= (Sels[i] << (i * 3)); 8308 8309 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8310 return MatchOperand_Success; 8311 } 8312 8313 bool 8314 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8315 const OperandVector &Operands) { 8316 if (Ctrl == "row_newbcast") 8317 return isGFX90A(); 8318 8319 if (Ctrl == "row_share" || 8320 Ctrl == "row_xmask") 8321 return isGFX10Plus(); 8322 8323 if (Ctrl == "wave_shl" || 8324 Ctrl == "wave_shr" || 8325 Ctrl == "wave_rol" || 8326 Ctrl == "wave_ror" || 8327 Ctrl == "row_bcast") 8328 return isVI() || isGFX9(); 8329 8330 return Ctrl == "row_mirror" || 8331 Ctrl == "row_half_mirror" || 8332 Ctrl == "quad_perm" || 8333 Ctrl == "row_shl" || 8334 Ctrl == "row_shr" || 8335 Ctrl == "row_ror"; 8336 } 8337 8338 int64_t 8339 AMDGPUAsmParser::parseDPPCtrlPerm() { 8340 // quad_perm:[%d,%d,%d,%d] 8341 8342 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8343 return -1; 8344 8345 int64_t Val = 0; 8346 for (int i = 0; i < 4; ++i) { 8347 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8348 return -1; 8349 8350 int64_t Temp; 8351 SMLoc Loc = getLoc(); 8352 if (getParser().parseAbsoluteExpression(Temp)) 8353 return -1; 8354 if (Temp < 0 || Temp > 3) { 8355 Error(Loc, "expected a 2-bit value"); 8356 return -1; 8357 } 8358 8359 Val += (Temp << i * 2); 8360 } 8361 8362 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8363 return -1; 8364 8365 return Val; 8366 } 8367 8368 int64_t 8369 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8370 using namespace AMDGPU::DPP; 8371 8372 // sel:%d 8373 8374 int64_t Val; 8375 SMLoc Loc = getLoc(); 8376 8377 if (getParser().parseAbsoluteExpression(Val)) 8378 return -1; 8379 8380 struct DppCtrlCheck { 8381 int64_t Ctrl; 8382 int Lo; 8383 int Hi; 8384 }; 8385 8386 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8387 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8388 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8389 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8390 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8391 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8392 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8393 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8394 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8395 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8396 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8397 .Default({-1, 0, 0}); 8398 8399 bool Valid; 8400 if (Check.Ctrl == -1) { 8401 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8402 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8403 } else { 8404 Valid = Check.Lo <= Val && Val <= Check.Hi; 8405 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8406 } 8407 8408 if (!Valid) { 8409 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8410 return -1; 8411 } 8412 8413 return Val; 8414 } 8415 8416 OperandMatchResultTy 8417 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8418 using namespace AMDGPU::DPP; 8419 8420 if (!isToken(AsmToken::Identifier) || 8421 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8422 return MatchOperand_NoMatch; 8423 8424 SMLoc S = getLoc(); 8425 int64_t Val = -1; 8426 StringRef Ctrl; 8427 8428 parseId(Ctrl); 8429 8430 if (Ctrl == "row_mirror") { 8431 Val = DppCtrl::ROW_MIRROR; 8432 } else if (Ctrl == "row_half_mirror") { 8433 Val = DppCtrl::ROW_HALF_MIRROR; 8434 } else { 8435 if (skipToken(AsmToken::Colon, "expected a colon")) { 8436 if (Ctrl == "quad_perm") { 8437 Val = parseDPPCtrlPerm(); 8438 } else { 8439 Val = parseDPPCtrlSel(Ctrl); 8440 } 8441 } 8442 } 8443 8444 if (Val == -1) 8445 return MatchOperand_ParseFail; 8446 8447 Operands.push_back( 8448 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8449 return MatchOperand_Success; 8450 } 8451 8452 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8453 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8454 } 8455 8456 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8457 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8458 } 8459 8460 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8461 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8462 } 8463 8464 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8465 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8466 } 8467 8468 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8469 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8470 } 8471 8472 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8473 OptionalImmIndexMap OptionalIdx; 8474 8475 unsigned Opc = Inst.getOpcode(); 8476 bool HasModifiers = 8477 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8478 unsigned I = 1; 8479 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8480 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8481 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8482 } 8483 8484 int Fi = 0; 8485 for (unsigned E = Operands.size(); I != E; ++I) { 8486 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8487 MCOI::TIED_TO); 8488 if (TiedTo != -1) { 8489 assert((unsigned)TiedTo < Inst.getNumOperands()); 8490 // handle tied old or src2 for MAC instructions 8491 Inst.addOperand(Inst.getOperand(TiedTo)); 8492 } 8493 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8494 // Add the register arguments 8495 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8496 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8497 // Skip it. 8498 continue; 8499 } 8500 8501 if (IsDPP8) { 8502 if (Op.isDPP8()) { 8503 Op.addImmOperands(Inst, 1); 8504 } else if (HasModifiers && 8505 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8506 Op.addRegWithFPInputModsOperands(Inst, 2); 8507 } else if (Op.isFI()) { 8508 Fi = Op.getImm(); 8509 } else if (Op.isReg()) { 8510 Op.addRegOperands(Inst, 1); 8511 } else { 8512 llvm_unreachable("Invalid operand type"); 8513 } 8514 } else { 8515 if (HasModifiers && 8516 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8517 Op.addRegWithFPInputModsOperands(Inst, 2); 8518 } else if (Op.isReg()) { 8519 Op.addRegOperands(Inst, 1); 8520 } else if (Op.isDPPCtrl()) { 8521 Op.addImmOperands(Inst, 1); 8522 } else if (Op.isImm()) { 8523 // Handle optional arguments 8524 OptionalIdx[Op.getImmTy()] = I; 8525 } else { 8526 llvm_unreachable("Invalid operand type"); 8527 } 8528 } 8529 } 8530 8531 if (IsDPP8) { 8532 using namespace llvm::AMDGPU::DPP; 8533 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8534 } else { 8535 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8536 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8537 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8538 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8539 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8540 } 8541 } 8542 } 8543 8544 //===----------------------------------------------------------------------===// 8545 // sdwa 8546 //===----------------------------------------------------------------------===// 8547 8548 OperandMatchResultTy 8549 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8550 AMDGPUOperand::ImmTy Type) { 8551 using namespace llvm::AMDGPU::SDWA; 8552 8553 SMLoc S = getLoc(); 8554 StringRef Value; 8555 OperandMatchResultTy res; 8556 8557 SMLoc StringLoc; 8558 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8559 if (res != MatchOperand_Success) { 8560 return res; 8561 } 8562 8563 int64_t Int; 8564 Int = StringSwitch<int64_t>(Value) 8565 .Case("BYTE_0", SdwaSel::BYTE_0) 8566 .Case("BYTE_1", SdwaSel::BYTE_1) 8567 .Case("BYTE_2", SdwaSel::BYTE_2) 8568 .Case("BYTE_3", SdwaSel::BYTE_3) 8569 .Case("WORD_0", SdwaSel::WORD_0) 8570 .Case("WORD_1", SdwaSel::WORD_1) 8571 .Case("DWORD", SdwaSel::DWORD) 8572 .Default(0xffffffff); 8573 8574 if (Int == 0xffffffff) { 8575 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8576 return MatchOperand_ParseFail; 8577 } 8578 8579 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8580 return MatchOperand_Success; 8581 } 8582 8583 OperandMatchResultTy 8584 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8585 using namespace llvm::AMDGPU::SDWA; 8586 8587 SMLoc S = getLoc(); 8588 StringRef Value; 8589 OperandMatchResultTy res; 8590 8591 SMLoc StringLoc; 8592 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8593 if (res != MatchOperand_Success) { 8594 return res; 8595 } 8596 8597 int64_t Int; 8598 Int = StringSwitch<int64_t>(Value) 8599 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8600 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8601 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8602 .Default(0xffffffff); 8603 8604 if (Int == 0xffffffff) { 8605 Error(StringLoc, "invalid dst_unused value"); 8606 return MatchOperand_ParseFail; 8607 } 8608 8609 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8610 return MatchOperand_Success; 8611 } 8612 8613 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8614 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8615 } 8616 8617 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8618 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8619 } 8620 8621 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8622 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8623 } 8624 8625 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8626 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8627 } 8628 8629 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8630 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8631 } 8632 8633 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8634 uint64_t BasicInstType, 8635 bool SkipDstVcc, 8636 bool SkipSrcVcc) { 8637 using namespace llvm::AMDGPU::SDWA; 8638 8639 OptionalImmIndexMap OptionalIdx; 8640 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8641 bool SkippedVcc = false; 8642 8643 unsigned I = 1; 8644 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8645 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8646 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8647 } 8648 8649 for (unsigned E = Operands.size(); I != E; ++I) { 8650 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8651 if (SkipVcc && !SkippedVcc && Op.isReg() && 8652 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8653 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8654 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8655 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8656 // Skip VCC only if we didn't skip it on previous iteration. 8657 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8658 if (BasicInstType == SIInstrFlags::VOP2 && 8659 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8660 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8661 SkippedVcc = true; 8662 continue; 8663 } else if (BasicInstType == SIInstrFlags::VOPC && 8664 Inst.getNumOperands() == 0) { 8665 SkippedVcc = true; 8666 continue; 8667 } 8668 } 8669 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8670 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8671 } else if (Op.isImm()) { 8672 // Handle optional arguments 8673 OptionalIdx[Op.getImmTy()] = I; 8674 } else { 8675 llvm_unreachable("Invalid operand type"); 8676 } 8677 SkippedVcc = false; 8678 } 8679 8680 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8681 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8682 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8683 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8684 switch (BasicInstType) { 8685 case SIInstrFlags::VOP1: 8686 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8687 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8688 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8689 } 8690 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8691 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8693 break; 8694 8695 case SIInstrFlags::VOP2: 8696 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8697 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8698 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8699 } 8700 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8701 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8702 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8703 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8704 break; 8705 8706 case SIInstrFlags::VOPC: 8707 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8708 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8709 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8710 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8711 break; 8712 8713 default: 8714 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8715 } 8716 } 8717 8718 // special case v_mac_{f16, f32}: 8719 // it has src2 register operand that is tied to dst operand 8720 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8721 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8722 auto it = Inst.begin(); 8723 std::advance( 8724 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8725 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8726 } 8727 } 8728 8729 //===----------------------------------------------------------------------===// 8730 // mAI 8731 //===----------------------------------------------------------------------===// 8732 8733 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8734 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8735 } 8736 8737 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8738 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8739 } 8740 8741 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8742 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8743 } 8744 8745 /// Force static initialization. 8746 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8747 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8748 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8749 } 8750 8751 #define GET_REGISTER_MATCHER 8752 #define GET_MATCHER_IMPLEMENTATION 8753 #define GET_MNEMONIC_SPELL_CHECKER 8754 #define GET_MNEMONIC_CHECKER 8755 #include "AMDGPUGenAsmMatcher.inc" 8756 8757 // This function should be defined after auto-generated include so that we have 8758 // MatchClassKind enum defined 8759 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8760 unsigned Kind) { 8761 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8762 // But MatchInstructionImpl() expects to meet token and fails to validate 8763 // operand. This method checks if we are given immediate operand but expect to 8764 // get corresponding token. 8765 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8766 switch (Kind) { 8767 case MCK_addr64: 8768 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8769 case MCK_gds: 8770 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8771 case MCK_lds: 8772 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8773 case MCK_idxen: 8774 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8775 case MCK_offen: 8776 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8777 case MCK_SSrcB32: 8778 // When operands have expression values, they will return true for isToken, 8779 // because it is not possible to distinguish between a token and an 8780 // expression at parse time. MatchInstructionImpl() will always try to 8781 // match an operand as a token, when isToken returns true, and when the 8782 // name of the expression is not a valid token, the match will fail, 8783 // so we need to handle it here. 8784 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8785 case MCK_SSrcF32: 8786 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8787 case MCK_SoppBrTarget: 8788 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8789 case MCK_VReg32OrOff: 8790 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8791 case MCK_InterpSlot: 8792 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8793 case MCK_Attr: 8794 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8795 case MCK_AttrChan: 8796 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8797 case MCK_ImmSMEMOffset: 8798 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8799 case MCK_SReg_64: 8800 case MCK_SReg_64_XEXEC: 8801 // Null is defined as a 32-bit register but 8802 // it should also be enabled with 64-bit operands. 8803 // The following code enables it for SReg_64 operands 8804 // used as source and destination. Remaining source 8805 // operands are handled in isInlinableImm. 8806 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8807 default: 8808 return Match_InvalidOperand; 8809 } 8810 } 8811 8812 //===----------------------------------------------------------------------===// 8813 // endpgm 8814 //===----------------------------------------------------------------------===// 8815 8816 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8817 SMLoc S = getLoc(); 8818 int64_t Imm = 0; 8819 8820 if (!parseExpr(Imm)) { 8821 // The operand is optional, if not present default to 0 8822 Imm = 0; 8823 } 8824 8825 if (!isUInt<16>(Imm)) { 8826 Error(S, "expected a 16-bit value"); 8827 return MatchOperand_ParseFail; 8828 } 8829 8830 Operands.push_back( 8831 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8832 return MatchOperand_Success; 8833 } 8834 8835 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8836