1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isDepCtr() const; 822 bool isHwreg() const; 823 bool isSendMsg() const; 824 bool isSwizzle() const; 825 bool isSMRDOffset8() const; 826 bool isSMEMOffset() const; 827 bool isSMRDLiteralOffset() const; 828 bool isDPP8() const; 829 bool isDPPCtrl() const; 830 bool isBLGP() const; 831 bool isCBSZ() const; 832 bool isABID() const; 833 bool isGPRIdxMode() const; 834 bool isS16Imm() const; 835 bool isU16Imm() const; 836 bool isEndpgm() const; 837 838 StringRef getExpressionAsToken() const { 839 assert(isExpr()); 840 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 841 return S->getSymbol().getName(); 842 } 843 844 StringRef getToken() const { 845 assert(isToken()); 846 847 if (Kind == Expression) 848 return getExpressionAsToken(); 849 850 return StringRef(Tok.Data, Tok.Length); 851 } 852 853 int64_t getImm() const { 854 assert(isImm()); 855 return Imm.Val; 856 } 857 858 void setImm(int64_t Val) { 859 assert(isImm()); 860 Imm.Val = Val; 861 } 862 863 ImmTy getImmTy() const { 864 assert(isImm()); 865 return Imm.Type; 866 } 867 868 unsigned getReg() const override { 869 assert(isRegKind()); 870 return Reg.RegNo; 871 } 872 873 SMLoc getStartLoc() const override { 874 return StartLoc; 875 } 876 877 SMLoc getEndLoc() const override { 878 return EndLoc; 879 } 880 881 SMRange getLocRange() const { 882 return SMRange(StartLoc, EndLoc); 883 } 884 885 Modifiers getModifiers() const { 886 assert(isRegKind() || isImmTy(ImmTyNone)); 887 return isRegKind() ? Reg.Mods : Imm.Mods; 888 } 889 890 void setModifiers(Modifiers Mods) { 891 assert(isRegKind() || isImmTy(ImmTyNone)); 892 if (isRegKind()) 893 Reg.Mods = Mods; 894 else 895 Imm.Mods = Mods; 896 } 897 898 bool hasModifiers() const { 899 return getModifiers().hasModifiers(); 900 } 901 902 bool hasFPModifiers() const { 903 return getModifiers().hasFPModifiers(); 904 } 905 906 bool hasIntModifiers() const { 907 return getModifiers().hasIntModifiers(); 908 } 909 910 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 911 912 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 913 914 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 915 916 template <unsigned Bitwidth> 917 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 918 919 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 920 addKImmFPOperands<16>(Inst, N); 921 } 922 923 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 924 addKImmFPOperands<32>(Inst, N); 925 } 926 927 void addRegOperands(MCInst &Inst, unsigned N) const; 928 929 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 930 addRegOperands(Inst, N); 931 } 932 933 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 934 if (isRegKind()) 935 addRegOperands(Inst, N); 936 else if (isExpr()) 937 Inst.addOperand(MCOperand::createExpr(Expr)); 938 else 939 addImmOperands(Inst, N); 940 } 941 942 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 943 Modifiers Mods = getModifiers(); 944 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 945 if (isRegKind()) { 946 addRegOperands(Inst, N); 947 } else { 948 addImmOperands(Inst, N, false); 949 } 950 } 951 952 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 953 assert(!hasIntModifiers()); 954 addRegOrImmWithInputModsOperands(Inst, N); 955 } 956 957 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 958 assert(!hasFPModifiers()); 959 addRegOrImmWithInputModsOperands(Inst, N); 960 } 961 962 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 963 Modifiers Mods = getModifiers(); 964 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 965 assert(isRegKind()); 966 addRegOperands(Inst, N); 967 } 968 969 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 970 assert(!hasIntModifiers()); 971 addRegWithInputModsOperands(Inst, N); 972 } 973 974 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 975 assert(!hasFPModifiers()); 976 addRegWithInputModsOperands(Inst, N); 977 } 978 979 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 980 if (isImm()) 981 addImmOperands(Inst, N); 982 else { 983 assert(isExpr()); 984 Inst.addOperand(MCOperand::createExpr(Expr)); 985 } 986 } 987 988 static void printImmTy(raw_ostream& OS, ImmTy Type) { 989 switch (Type) { 990 case ImmTyNone: OS << "None"; break; 991 case ImmTyGDS: OS << "GDS"; break; 992 case ImmTyLDS: OS << "LDS"; break; 993 case ImmTyOffen: OS << "Offen"; break; 994 case ImmTyIdxen: OS << "Idxen"; break; 995 case ImmTyAddr64: OS << "Addr64"; break; 996 case ImmTyOffset: OS << "Offset"; break; 997 case ImmTyInstOffset: OS << "InstOffset"; break; 998 case ImmTyOffset0: OS << "Offset0"; break; 999 case ImmTyOffset1: OS << "Offset1"; break; 1000 case ImmTyCPol: OS << "CPol"; break; 1001 case ImmTySWZ: OS << "SWZ"; break; 1002 case ImmTyTFE: OS << "TFE"; break; 1003 case ImmTyD16: OS << "D16"; break; 1004 case ImmTyFORMAT: OS << "FORMAT"; break; 1005 case ImmTyClampSI: OS << "ClampSI"; break; 1006 case ImmTyOModSI: OS << "OModSI"; break; 1007 case ImmTyDPP8: OS << "DPP8"; break; 1008 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1009 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1010 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1011 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1012 case ImmTyDppFi: OS << "FI"; break; 1013 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1014 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1015 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1016 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1017 case ImmTyDMask: OS << "DMask"; break; 1018 case ImmTyDim: OS << "Dim"; break; 1019 case ImmTyUNorm: OS << "UNorm"; break; 1020 case ImmTyDA: OS << "DA"; break; 1021 case ImmTyR128A16: OS << "R128A16"; break; 1022 case ImmTyA16: OS << "A16"; break; 1023 case ImmTyLWE: OS << "LWE"; break; 1024 case ImmTyOff: OS << "Off"; break; 1025 case ImmTyExpTgt: OS << "ExpTgt"; break; 1026 case ImmTyExpCompr: OS << "ExpCompr"; break; 1027 case ImmTyExpVM: OS << "ExpVM"; break; 1028 case ImmTyHwreg: OS << "Hwreg"; break; 1029 case ImmTySendMsg: OS << "SendMsg"; break; 1030 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1031 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1032 case ImmTyAttrChan: OS << "AttrChan"; break; 1033 case ImmTyOpSel: OS << "OpSel"; break; 1034 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1035 case ImmTyNegLo: OS << "NegLo"; break; 1036 case ImmTyNegHi: OS << "NegHi"; break; 1037 case ImmTySwizzle: OS << "Swizzle"; break; 1038 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1039 case ImmTyHigh: OS << "High"; break; 1040 case ImmTyBLGP: OS << "BLGP"; break; 1041 case ImmTyCBSZ: OS << "CBSZ"; break; 1042 case ImmTyABID: OS << "ABID"; break; 1043 case ImmTyEndpgm: OS << "Endpgm"; break; 1044 } 1045 } 1046 1047 void print(raw_ostream &OS) const override { 1048 switch (Kind) { 1049 case Register: 1050 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1051 break; 1052 case Immediate: 1053 OS << '<' << getImm(); 1054 if (getImmTy() != ImmTyNone) { 1055 OS << " type: "; printImmTy(OS, getImmTy()); 1056 } 1057 OS << " mods: " << Imm.Mods << '>'; 1058 break; 1059 case Token: 1060 OS << '\'' << getToken() << '\''; 1061 break; 1062 case Expression: 1063 OS << "<expr " << *Expr << '>'; 1064 break; 1065 } 1066 } 1067 1068 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1069 int64_t Val, SMLoc Loc, 1070 ImmTy Type = ImmTyNone, 1071 bool IsFPImm = false) { 1072 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1073 Op->Imm.Val = Val; 1074 Op->Imm.IsFPImm = IsFPImm; 1075 Op->Imm.Kind = ImmKindTyNone; 1076 Op->Imm.Type = Type; 1077 Op->Imm.Mods = Modifiers(); 1078 Op->StartLoc = Loc; 1079 Op->EndLoc = Loc; 1080 return Op; 1081 } 1082 1083 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1084 StringRef Str, SMLoc Loc, 1085 bool HasExplicitEncodingSize = true) { 1086 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1087 Res->Tok.Data = Str.data(); 1088 Res->Tok.Length = Str.size(); 1089 Res->StartLoc = Loc; 1090 Res->EndLoc = Loc; 1091 return Res; 1092 } 1093 1094 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1095 unsigned RegNo, SMLoc S, 1096 SMLoc E) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1098 Op->Reg.RegNo = RegNo; 1099 Op->Reg.Mods = Modifiers(); 1100 Op->StartLoc = S; 1101 Op->EndLoc = E; 1102 return Op; 1103 } 1104 1105 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1106 const class MCExpr *Expr, SMLoc S) { 1107 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1108 Op->Expr = Expr; 1109 Op->StartLoc = S; 1110 Op->EndLoc = S; 1111 return Op; 1112 } 1113 }; 1114 1115 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1116 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1117 return OS; 1118 } 1119 1120 //===----------------------------------------------------------------------===// 1121 // AsmParser 1122 //===----------------------------------------------------------------------===// 1123 1124 // Holds info related to the current kernel, e.g. count of SGPRs used. 1125 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1126 // .amdgpu_hsa_kernel or at EOF. 1127 class KernelScopeInfo { 1128 int SgprIndexUnusedMin = -1; 1129 int VgprIndexUnusedMin = -1; 1130 int AgprIndexUnusedMin = -1; 1131 MCContext *Ctx = nullptr; 1132 MCSubtargetInfo const *MSTI = nullptr; 1133 1134 void usesSgprAt(int i) { 1135 if (i >= SgprIndexUnusedMin) { 1136 SgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol* const Sym = 1139 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1140 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1141 } 1142 } 1143 } 1144 1145 void usesVgprAt(int i) { 1146 if (i >= VgprIndexUnusedMin) { 1147 VgprIndexUnusedMin = ++i; 1148 if (Ctx) { 1149 MCSymbol* const Sym = 1150 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1151 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1152 VgprIndexUnusedMin); 1153 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1154 } 1155 } 1156 } 1157 1158 void usesAgprAt(int i) { 1159 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1160 if (!hasMAIInsts(*MSTI)) 1161 return; 1162 1163 if (i >= AgprIndexUnusedMin) { 1164 AgprIndexUnusedMin = ++i; 1165 if (Ctx) { 1166 MCSymbol* const Sym = 1167 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1168 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1169 1170 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1171 MCSymbol* const vSym = 1172 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1173 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1174 VgprIndexUnusedMin); 1175 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1176 } 1177 } 1178 } 1179 1180 public: 1181 KernelScopeInfo() = default; 1182 1183 void initialize(MCContext &Context) { 1184 Ctx = &Context; 1185 MSTI = Ctx->getSubtargetInfo(); 1186 1187 usesSgprAt(SgprIndexUnusedMin = -1); 1188 usesVgprAt(VgprIndexUnusedMin = -1); 1189 if (hasMAIInsts(*MSTI)) { 1190 usesAgprAt(AgprIndexUnusedMin = -1); 1191 } 1192 } 1193 1194 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1195 unsigned RegWidth) { 1196 switch (RegKind) { 1197 case IS_SGPR: 1198 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1199 break; 1200 case IS_AGPR: 1201 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1202 break; 1203 case IS_VGPR: 1204 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1205 break; 1206 default: 1207 break; 1208 } 1209 } 1210 }; 1211 1212 class AMDGPUAsmParser : public MCTargetAsmParser { 1213 MCAsmParser &Parser; 1214 1215 // Number of extra operands parsed after the first optional operand. 1216 // This may be necessary to skip hardcoded mandatory operands. 1217 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1218 1219 unsigned ForcedEncodingSize = 0; 1220 bool ForcedDPP = false; 1221 bool ForcedSDWA = false; 1222 KernelScopeInfo KernelScope; 1223 unsigned CPolSeen; 1224 1225 /// @name Auto-generated Match Functions 1226 /// { 1227 1228 #define GET_ASSEMBLER_HEADER 1229 #include "AMDGPUGenAsmMatcher.inc" 1230 1231 /// } 1232 1233 private: 1234 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1235 bool OutOfRangeError(SMRange Range); 1236 /// Calculate VGPR/SGPR blocks required for given target, reserved 1237 /// registers, and user-specified NextFreeXGPR values. 1238 /// 1239 /// \param Features [in] Target features, used for bug corrections. 1240 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1241 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1242 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1243 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1244 /// descriptor field, if valid. 1245 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1246 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1247 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1248 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1249 /// \param VGPRBlocks [out] Result VGPR block count. 1250 /// \param SGPRBlocks [out] Result SGPR block count. 1251 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1252 bool FlatScrUsed, bool XNACKUsed, 1253 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1254 SMRange VGPRRange, unsigned NextFreeSGPR, 1255 SMRange SGPRRange, unsigned &VGPRBlocks, 1256 unsigned &SGPRBlocks); 1257 bool ParseDirectiveAMDGCNTarget(); 1258 bool ParseDirectiveAMDHSAKernel(); 1259 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1260 bool ParseDirectiveHSACodeObjectVersion(); 1261 bool ParseDirectiveHSACodeObjectISA(); 1262 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1263 bool ParseDirectiveAMDKernelCodeT(); 1264 // TODO: Possibly make subtargetHasRegister const. 1265 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1266 bool ParseDirectiveAMDGPUHsaKernel(); 1267 1268 bool ParseDirectiveISAVersion(); 1269 bool ParseDirectiveHSAMetadata(); 1270 bool ParseDirectivePALMetadataBegin(); 1271 bool ParseDirectivePALMetadata(); 1272 bool ParseDirectiveAMDGPULDS(); 1273 1274 /// Common code to parse out a block of text (typically YAML) between start and 1275 /// end directives. 1276 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1277 const char *AssemblerDirectiveEnd, 1278 std::string &CollectString); 1279 1280 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1281 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1282 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1283 unsigned &RegNum, unsigned &RegWidth, 1284 bool RestoreOnFailure = false); 1285 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1286 unsigned &RegNum, unsigned &RegWidth, 1287 SmallVectorImpl<AsmToken> &Tokens); 1288 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1289 unsigned &RegWidth, 1290 SmallVectorImpl<AsmToken> &Tokens); 1291 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1292 unsigned &RegWidth, 1293 SmallVectorImpl<AsmToken> &Tokens); 1294 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1295 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1296 bool ParseRegRange(unsigned& Num, unsigned& Width); 1297 unsigned getRegularReg(RegisterKind RegKind, 1298 unsigned RegNum, 1299 unsigned RegWidth, 1300 SMLoc Loc); 1301 1302 bool isRegister(); 1303 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1304 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1305 void initializeGprCountSymbol(RegisterKind RegKind); 1306 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1307 unsigned RegWidth); 1308 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1309 bool IsAtomic, bool IsLds = false); 1310 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1311 bool IsGdsHardcoded); 1312 1313 public: 1314 enum AMDGPUMatchResultTy { 1315 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1316 }; 1317 enum OperandMode { 1318 OperandMode_Default, 1319 OperandMode_NSA, 1320 }; 1321 1322 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1323 1324 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1325 const MCInstrInfo &MII, 1326 const MCTargetOptions &Options) 1327 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1328 MCAsmParserExtension::Initialize(Parser); 1329 1330 if (getFeatureBits().none()) { 1331 // Set default features. 1332 copySTI().ToggleFeature("southern-islands"); 1333 } 1334 1335 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1336 1337 { 1338 // TODO: make those pre-defined variables read-only. 1339 // Currently there is none suitable machinery in the core llvm-mc for this. 1340 // MCSymbol::isRedefinable is intended for another purpose, and 1341 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1342 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1343 MCContext &Ctx = getContext(); 1344 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1345 MCSymbol *Sym = 1346 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1350 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1351 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1352 } else { 1353 MCSymbol *Sym = 1354 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1355 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1356 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1357 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1358 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1359 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1360 } 1361 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1362 initializeGprCountSymbol(IS_VGPR); 1363 initializeGprCountSymbol(IS_SGPR); 1364 } else 1365 KernelScope.initialize(getContext()); 1366 } 1367 } 1368 1369 bool hasMIMG_R128() const { 1370 return AMDGPU::hasMIMG_R128(getSTI()); 1371 } 1372 1373 bool hasPackedD16() const { 1374 return AMDGPU::hasPackedD16(getSTI()); 1375 } 1376 1377 bool hasGFX10A16() const { 1378 return AMDGPU::hasGFX10A16(getSTI()); 1379 } 1380 1381 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1382 1383 bool isSI() const { 1384 return AMDGPU::isSI(getSTI()); 1385 } 1386 1387 bool isCI() const { 1388 return AMDGPU::isCI(getSTI()); 1389 } 1390 1391 bool isVI() const { 1392 return AMDGPU::isVI(getSTI()); 1393 } 1394 1395 bool isGFX9() const { 1396 return AMDGPU::isGFX9(getSTI()); 1397 } 1398 1399 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1400 bool isGFX90A() const { 1401 return AMDGPU::isGFX90A(getSTI()); 1402 } 1403 1404 bool isGFX940() const { 1405 return AMDGPU::isGFX940(getSTI()); 1406 } 1407 1408 bool isGFX9Plus() const { 1409 return AMDGPU::isGFX9Plus(getSTI()); 1410 } 1411 1412 bool isGFX10() const { 1413 return AMDGPU::isGFX10(getSTI()); 1414 } 1415 1416 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1417 1418 bool isGFX10_BEncoding() const { 1419 return AMDGPU::isGFX10_BEncoding(getSTI()); 1420 } 1421 1422 bool hasInv2PiInlineImm() const { 1423 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1424 } 1425 1426 bool hasFlatOffsets() const { 1427 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1428 } 1429 1430 bool hasArchitectedFlatScratch() const { 1431 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1432 } 1433 1434 bool hasSGPR102_SGPR103() const { 1435 return !isVI() && !isGFX9(); 1436 } 1437 1438 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1439 1440 bool hasIntClamp() const { 1441 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1442 } 1443 1444 AMDGPUTargetStreamer &getTargetStreamer() { 1445 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1446 return static_cast<AMDGPUTargetStreamer &>(TS); 1447 } 1448 1449 const MCRegisterInfo *getMRI() const { 1450 // We need this const_cast because for some reason getContext() is not const 1451 // in MCAsmParser. 1452 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1453 } 1454 1455 const MCInstrInfo *getMII() const { 1456 return &MII; 1457 } 1458 1459 const FeatureBitset &getFeatureBits() const { 1460 return getSTI().getFeatureBits(); 1461 } 1462 1463 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1464 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1465 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1466 1467 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1468 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1469 bool isForcedDPP() const { return ForcedDPP; } 1470 bool isForcedSDWA() const { return ForcedSDWA; } 1471 ArrayRef<unsigned> getMatchedVariants() const; 1472 StringRef getMatchedVariantName() const; 1473 1474 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1475 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1476 bool RestoreOnFailure); 1477 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1478 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1479 SMLoc &EndLoc) override; 1480 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1481 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1482 unsigned Kind) override; 1483 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1484 OperandVector &Operands, MCStreamer &Out, 1485 uint64_t &ErrorInfo, 1486 bool MatchingInlineAsm) override; 1487 bool ParseDirective(AsmToken DirectiveID) override; 1488 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1489 OperandMode Mode = OperandMode_Default); 1490 StringRef parseMnemonicSuffix(StringRef Name); 1491 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1492 SMLoc NameLoc, OperandVector &Operands) override; 1493 //bool ProcessInstruction(MCInst &Inst); 1494 1495 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1496 1497 OperandMatchResultTy 1498 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1499 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1500 bool (*ConvertResult)(int64_t &) = nullptr); 1501 1502 OperandMatchResultTy 1503 parseOperandArrayWithPrefix(const char *Prefix, 1504 OperandVector &Operands, 1505 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1506 bool (*ConvertResult)(int64_t&) = nullptr); 1507 1508 OperandMatchResultTy 1509 parseNamedBit(StringRef Name, OperandVector &Operands, 1510 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1511 OperandMatchResultTy parseCPol(OperandVector &Operands); 1512 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1513 StringRef &Value, 1514 SMLoc &StringLoc); 1515 1516 bool isModifier(); 1517 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1518 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1519 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1520 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1521 bool parseSP3NegModifier(); 1522 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1523 OperandMatchResultTy parseReg(OperandVector &Operands); 1524 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1525 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1526 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1527 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1528 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1529 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1530 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1531 OperandMatchResultTy parseUfmt(int64_t &Format); 1532 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1533 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1534 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1535 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1536 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1537 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1538 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1539 1540 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1541 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1542 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1543 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1544 1545 bool parseCnt(int64_t &IntVal); 1546 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1547 1548 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1549 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1550 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1551 1552 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1553 1554 private: 1555 struct OperandInfoTy { 1556 SMLoc Loc; 1557 int64_t Id; 1558 bool IsSymbolic = false; 1559 bool IsDefined = false; 1560 1561 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1562 }; 1563 1564 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1565 bool validateSendMsg(const OperandInfoTy &Msg, 1566 const OperandInfoTy &Op, 1567 const OperandInfoTy &Stream); 1568 1569 bool parseHwregBody(OperandInfoTy &HwReg, 1570 OperandInfoTy &Offset, 1571 OperandInfoTy &Width); 1572 bool validateHwreg(const OperandInfoTy &HwReg, 1573 const OperandInfoTy &Offset, 1574 const OperandInfoTy &Width); 1575 1576 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1577 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1578 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1579 1580 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1581 const OperandVector &Operands) const; 1582 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1583 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1584 SMLoc getLitLoc(const OperandVector &Operands) const; 1585 SMLoc getConstLoc(const OperandVector &Operands) const; 1586 1587 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1588 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1589 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateSOPLiteral(const MCInst &Inst) const; 1591 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1592 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1593 bool validateIntClampSupported(const MCInst &Inst); 1594 bool validateMIMGAtomicDMask(const MCInst &Inst); 1595 bool validateMIMGGatherDMask(const MCInst &Inst); 1596 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1597 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1598 bool validateMIMGAddrSize(const MCInst &Inst); 1599 bool validateMIMGD16(const MCInst &Inst); 1600 bool validateMIMGDim(const MCInst &Inst); 1601 bool validateMIMGMSAA(const MCInst &Inst); 1602 bool validateOpSel(const MCInst &Inst); 1603 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateVccOperand(unsigned Reg) const; 1605 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1606 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1607 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1608 bool validateAGPRLdSt(const MCInst &Inst) const; 1609 bool validateVGPRAlign(const MCInst &Inst) const; 1610 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1611 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1612 bool validateDivScale(const MCInst &Inst); 1613 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1614 const SMLoc &IDLoc); 1615 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1616 unsigned getConstantBusLimit(unsigned Opcode) const; 1617 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1618 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1619 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1620 1621 bool isSupportedMnemo(StringRef Mnemo, 1622 const FeatureBitset &FBS); 1623 bool isSupportedMnemo(StringRef Mnemo, 1624 const FeatureBitset &FBS, 1625 ArrayRef<unsigned> Variants); 1626 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1627 1628 bool isId(const StringRef Id) const; 1629 bool isId(const AsmToken &Token, const StringRef Id) const; 1630 bool isToken(const AsmToken::TokenKind Kind) const; 1631 bool trySkipId(const StringRef Id); 1632 bool trySkipId(const StringRef Pref, const StringRef Id); 1633 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1634 bool trySkipToken(const AsmToken::TokenKind Kind); 1635 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1636 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1637 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1638 1639 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1640 AsmToken::TokenKind getTokenKind() const; 1641 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1642 bool parseExpr(OperandVector &Operands); 1643 StringRef getTokenStr() const; 1644 AsmToken peekToken(); 1645 AsmToken getToken() const; 1646 SMLoc getLoc() const; 1647 void lex(); 1648 1649 public: 1650 void onBeginOfFile() override; 1651 1652 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1653 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1654 1655 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1656 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1657 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1658 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1659 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1660 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1661 1662 bool parseSwizzleOperand(int64_t &Op, 1663 const unsigned MinVal, 1664 const unsigned MaxVal, 1665 const StringRef ErrMsg, 1666 SMLoc &Loc); 1667 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1668 const unsigned MinVal, 1669 const unsigned MaxVal, 1670 const StringRef ErrMsg); 1671 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1672 bool parseSwizzleOffset(int64_t &Imm); 1673 bool parseSwizzleMacro(int64_t &Imm); 1674 bool parseSwizzleQuadPerm(int64_t &Imm); 1675 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1676 bool parseSwizzleBroadcast(int64_t &Imm); 1677 bool parseSwizzleSwap(int64_t &Imm); 1678 bool parseSwizzleReverse(int64_t &Imm); 1679 1680 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1681 int64_t parseGPRIdxMacro(); 1682 1683 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1684 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1685 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1686 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1687 1688 AMDGPUOperand::Ptr defaultCPol() const; 1689 1690 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1691 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1692 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1693 AMDGPUOperand::Ptr defaultFlatOffset() const; 1694 1695 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1696 1697 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1698 OptionalImmIndexMap &OptionalIdx); 1699 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1700 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1701 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1702 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1703 OptionalImmIndexMap &OptionalIdx); 1704 1705 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1706 1707 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1708 bool IsAtomic = false); 1709 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1710 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1711 1712 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1713 1714 bool parseDimId(unsigned &Encoding); 1715 OperandMatchResultTy parseDim(OperandVector &Operands); 1716 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1717 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1718 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1719 int64_t parseDPPCtrlSel(StringRef Ctrl); 1720 int64_t parseDPPCtrlPerm(); 1721 AMDGPUOperand::Ptr defaultRowMask() const; 1722 AMDGPUOperand::Ptr defaultBankMask() const; 1723 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1724 AMDGPUOperand::Ptr defaultFI() const; 1725 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1726 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1727 1728 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1729 AMDGPUOperand::ImmTy Type); 1730 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1731 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1732 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1733 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1734 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1735 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1736 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1737 uint64_t BasicInstType, 1738 bool SkipDstVcc = false, 1739 bool SkipSrcVcc = false); 1740 1741 AMDGPUOperand::Ptr defaultBLGP() const; 1742 AMDGPUOperand::Ptr defaultCBSZ() const; 1743 AMDGPUOperand::Ptr defaultABID() const; 1744 1745 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1746 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1747 }; 1748 1749 struct OptionalOperand { 1750 const char *Name; 1751 AMDGPUOperand::ImmTy Type; 1752 bool IsBit; 1753 bool (*ConvertResult)(int64_t&); 1754 }; 1755 1756 } // end anonymous namespace 1757 1758 // May be called with integer type with equivalent bitwidth. 1759 static const fltSemantics *getFltSemantics(unsigned Size) { 1760 switch (Size) { 1761 case 4: 1762 return &APFloat::IEEEsingle(); 1763 case 8: 1764 return &APFloat::IEEEdouble(); 1765 case 2: 1766 return &APFloat::IEEEhalf(); 1767 default: 1768 llvm_unreachable("unsupported fp type"); 1769 } 1770 } 1771 1772 static const fltSemantics *getFltSemantics(MVT VT) { 1773 return getFltSemantics(VT.getSizeInBits() / 8); 1774 } 1775 1776 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1777 switch (OperandType) { 1778 case AMDGPU::OPERAND_REG_IMM_INT32: 1779 case AMDGPU::OPERAND_REG_IMM_FP32: 1780 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1781 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1782 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1783 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1784 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1785 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1786 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1787 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1788 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1789 case AMDGPU::OPERAND_KIMM32: 1790 return &APFloat::IEEEsingle(); 1791 case AMDGPU::OPERAND_REG_IMM_INT64: 1792 case AMDGPU::OPERAND_REG_IMM_FP64: 1793 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1794 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1795 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1796 return &APFloat::IEEEdouble(); 1797 case AMDGPU::OPERAND_REG_IMM_INT16: 1798 case AMDGPU::OPERAND_REG_IMM_FP16: 1799 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1800 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1801 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1802 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1808 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1809 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1810 case AMDGPU::OPERAND_KIMM16: 1811 return &APFloat::IEEEhalf(); 1812 default: 1813 llvm_unreachable("unsupported fp type"); 1814 } 1815 } 1816 1817 //===----------------------------------------------------------------------===// 1818 // Operand 1819 //===----------------------------------------------------------------------===// 1820 1821 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1822 bool Lost; 1823 1824 // Convert literal to single precision 1825 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1826 APFloat::rmNearestTiesToEven, 1827 &Lost); 1828 // We allow precision lost but not overflow or underflow 1829 if (Status != APFloat::opOK && 1830 Lost && 1831 ((Status & APFloat::opOverflow) != 0 || 1832 (Status & APFloat::opUnderflow) != 0)) { 1833 return false; 1834 } 1835 1836 return true; 1837 } 1838 1839 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1840 return isUIntN(Size, Val) || isIntN(Size, Val); 1841 } 1842 1843 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1844 if (VT.getScalarType() == MVT::i16) { 1845 // FP immediate values are broken. 1846 return isInlinableIntLiteral(Val); 1847 } 1848 1849 // f16/v2f16 operands work correctly for all values. 1850 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1851 } 1852 1853 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1854 1855 // This is a hack to enable named inline values like 1856 // shared_base with both 32-bit and 64-bit operands. 1857 // Note that these values are defined as 1858 // 32-bit operands only. 1859 if (isInlineValue()) { 1860 return true; 1861 } 1862 1863 if (!isImmTy(ImmTyNone)) { 1864 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1865 return false; 1866 } 1867 // TODO: We should avoid using host float here. It would be better to 1868 // check the float bit values which is what a few other places do. 1869 // We've had bot failures before due to weird NaN support on mips hosts. 1870 1871 APInt Literal(64, Imm.Val); 1872 1873 if (Imm.IsFPImm) { // We got fp literal token 1874 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1875 return AMDGPU::isInlinableLiteral64(Imm.Val, 1876 AsmParser->hasInv2PiInlineImm()); 1877 } 1878 1879 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1880 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1881 return false; 1882 1883 if (type.getScalarSizeInBits() == 16) { 1884 return isInlineableLiteralOp16( 1885 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1886 type, AsmParser->hasInv2PiInlineImm()); 1887 } 1888 1889 // Check if single precision literal is inlinable 1890 return AMDGPU::isInlinableLiteral32( 1891 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1892 AsmParser->hasInv2PiInlineImm()); 1893 } 1894 1895 // We got int literal token. 1896 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1897 return AMDGPU::isInlinableLiteral64(Imm.Val, 1898 AsmParser->hasInv2PiInlineImm()); 1899 } 1900 1901 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1902 return false; 1903 } 1904 1905 if (type.getScalarSizeInBits() == 16) { 1906 return isInlineableLiteralOp16( 1907 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1908 type, AsmParser->hasInv2PiInlineImm()); 1909 } 1910 1911 return AMDGPU::isInlinableLiteral32( 1912 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1913 AsmParser->hasInv2PiInlineImm()); 1914 } 1915 1916 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1917 // Check that this immediate can be added as literal 1918 if (!isImmTy(ImmTyNone)) { 1919 return false; 1920 } 1921 1922 if (!Imm.IsFPImm) { 1923 // We got int literal token. 1924 1925 if (type == MVT::f64 && hasFPModifiers()) { 1926 // Cannot apply fp modifiers to int literals preserving the same semantics 1927 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1928 // disable these cases. 1929 return false; 1930 } 1931 1932 unsigned Size = type.getSizeInBits(); 1933 if (Size == 64) 1934 Size = 32; 1935 1936 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1937 // types. 1938 return isSafeTruncation(Imm.Val, Size); 1939 } 1940 1941 // We got fp literal token 1942 if (type == MVT::f64) { // Expected 64-bit fp operand 1943 // We would set low 64-bits of literal to zeroes but we accept this literals 1944 return true; 1945 } 1946 1947 if (type == MVT::i64) { // Expected 64-bit int operand 1948 // We don't allow fp literals in 64-bit integer instructions. It is 1949 // unclear how we should encode them. 1950 return false; 1951 } 1952 1953 // We allow fp literals with f16x2 operands assuming that the specified 1954 // literal goes into the lower half and the upper half is zero. We also 1955 // require that the literal may be losslessly converted to f16. 1956 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1957 (type == MVT::v2i16)? MVT::i16 : 1958 (type == MVT::v2f32)? MVT::f32 : type; 1959 1960 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1961 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1962 } 1963 1964 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1965 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1966 } 1967 1968 bool AMDGPUOperand::isVRegWithInputMods() const { 1969 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1970 // GFX90A allows DPP on 64-bit operands. 1971 (isRegClass(AMDGPU::VReg_64RegClassID) && 1972 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1973 } 1974 1975 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1976 if (AsmParser->isVI()) 1977 return isVReg32(); 1978 else if (AsmParser->isGFX9Plus()) 1979 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1980 else 1981 return false; 1982 } 1983 1984 bool AMDGPUOperand::isSDWAFP16Operand() const { 1985 return isSDWAOperand(MVT::f16); 1986 } 1987 1988 bool AMDGPUOperand::isSDWAFP32Operand() const { 1989 return isSDWAOperand(MVT::f32); 1990 } 1991 1992 bool AMDGPUOperand::isSDWAInt16Operand() const { 1993 return isSDWAOperand(MVT::i16); 1994 } 1995 1996 bool AMDGPUOperand::isSDWAInt32Operand() const { 1997 return isSDWAOperand(MVT::i32); 1998 } 1999 2000 bool AMDGPUOperand::isBoolReg() const { 2001 auto FB = AsmParser->getFeatureBits(); 2002 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2003 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2004 } 2005 2006 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2007 { 2008 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2009 assert(Size == 2 || Size == 4 || Size == 8); 2010 2011 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2012 2013 if (Imm.Mods.Abs) { 2014 Val &= ~FpSignMask; 2015 } 2016 if (Imm.Mods.Neg) { 2017 Val ^= FpSignMask; 2018 } 2019 2020 return Val; 2021 } 2022 2023 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2024 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2025 Inst.getNumOperands())) { 2026 addLiteralImmOperand(Inst, Imm.Val, 2027 ApplyModifiers & 2028 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2029 } else { 2030 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2031 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2032 setImmKindNone(); 2033 } 2034 } 2035 2036 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2037 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2038 auto OpNum = Inst.getNumOperands(); 2039 // Check that this operand accepts literals 2040 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2041 2042 if (ApplyModifiers) { 2043 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2044 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2045 Val = applyInputFPModifiers(Val, Size); 2046 } 2047 2048 APInt Literal(64, Val); 2049 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2050 2051 if (Imm.IsFPImm) { // We got fp literal token 2052 switch (OpTy) { 2053 case AMDGPU::OPERAND_REG_IMM_INT64: 2054 case AMDGPU::OPERAND_REG_IMM_FP64: 2055 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2056 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2057 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2058 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2059 AsmParser->hasInv2PiInlineImm())) { 2060 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2061 setImmKindConst(); 2062 return; 2063 } 2064 2065 // Non-inlineable 2066 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2067 // For fp operands we check if low 32 bits are zeros 2068 if (Literal.getLoBits(32) != 0) { 2069 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2070 "Can't encode literal as exact 64-bit floating-point operand. " 2071 "Low 32-bits will be set to zero"); 2072 } 2073 2074 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2075 setImmKindLiteral(); 2076 return; 2077 } 2078 2079 // We don't allow fp literals in 64-bit integer instructions. It is 2080 // unclear how we should encode them. This case should be checked earlier 2081 // in predicate methods (isLiteralImm()) 2082 llvm_unreachable("fp literal in 64-bit integer instruction."); 2083 2084 case AMDGPU::OPERAND_REG_IMM_INT32: 2085 case AMDGPU::OPERAND_REG_IMM_FP32: 2086 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2087 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2088 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2089 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2090 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2091 case AMDGPU::OPERAND_REG_IMM_INT16: 2092 case AMDGPU::OPERAND_REG_IMM_FP16: 2093 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2094 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2095 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2096 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2097 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2098 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2099 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2100 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2102 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2103 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2104 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2105 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2106 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2107 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2108 case AMDGPU::OPERAND_KIMM32: 2109 case AMDGPU::OPERAND_KIMM16: { 2110 bool lost; 2111 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2112 // Convert literal to single precision 2113 FPLiteral.convert(*getOpFltSemantics(OpTy), 2114 APFloat::rmNearestTiesToEven, &lost); 2115 // We allow precision lost but not overflow or underflow. This should be 2116 // checked earlier in isLiteralImm() 2117 2118 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2119 Inst.addOperand(MCOperand::createImm(ImmVal)); 2120 setImmKindLiteral(); 2121 return; 2122 } 2123 default: 2124 llvm_unreachable("invalid operand size"); 2125 } 2126 2127 return; 2128 } 2129 2130 // We got int literal token. 2131 // Only sign extend inline immediates. 2132 switch (OpTy) { 2133 case AMDGPU::OPERAND_REG_IMM_INT32: 2134 case AMDGPU::OPERAND_REG_IMM_FP32: 2135 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2136 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2137 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2138 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2139 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2140 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2141 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2142 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2143 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2144 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2145 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2146 if (isSafeTruncation(Val, 32) && 2147 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2148 AsmParser->hasInv2PiInlineImm())) { 2149 Inst.addOperand(MCOperand::createImm(Val)); 2150 setImmKindConst(); 2151 return; 2152 } 2153 2154 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2155 setImmKindLiteral(); 2156 return; 2157 2158 case AMDGPU::OPERAND_REG_IMM_INT64: 2159 case AMDGPU::OPERAND_REG_IMM_FP64: 2160 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2161 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2163 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2164 Inst.addOperand(MCOperand::createImm(Val)); 2165 setImmKindConst(); 2166 return; 2167 } 2168 2169 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2170 setImmKindLiteral(); 2171 return; 2172 2173 case AMDGPU::OPERAND_REG_IMM_INT16: 2174 case AMDGPU::OPERAND_REG_IMM_FP16: 2175 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2176 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2177 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2179 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2180 if (isSafeTruncation(Val, 16) && 2181 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2182 AsmParser->hasInv2PiInlineImm())) { 2183 Inst.addOperand(MCOperand::createImm(Val)); 2184 setImmKindConst(); 2185 return; 2186 } 2187 2188 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2189 setImmKindLiteral(); 2190 return; 2191 2192 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2193 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2194 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2195 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2196 assert(isSafeTruncation(Val, 16)); 2197 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2198 AsmParser->hasInv2PiInlineImm())); 2199 2200 Inst.addOperand(MCOperand::createImm(Val)); 2201 return; 2202 } 2203 case AMDGPU::OPERAND_KIMM32: 2204 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2205 setImmKindNone(); 2206 return; 2207 case AMDGPU::OPERAND_KIMM16: 2208 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2209 setImmKindNone(); 2210 return; 2211 default: 2212 llvm_unreachable("invalid operand size"); 2213 } 2214 } 2215 2216 template <unsigned Bitwidth> 2217 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2218 APInt Literal(64, Imm.Val); 2219 setImmKindNone(); 2220 2221 if (!Imm.IsFPImm) { 2222 // We got int literal token. 2223 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2224 return; 2225 } 2226 2227 bool Lost; 2228 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2229 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2230 APFloat::rmNearestTiesToEven, &Lost); 2231 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2232 } 2233 2234 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2235 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2236 } 2237 2238 static bool isInlineValue(unsigned Reg) { 2239 switch (Reg) { 2240 case AMDGPU::SRC_SHARED_BASE: 2241 case AMDGPU::SRC_SHARED_LIMIT: 2242 case AMDGPU::SRC_PRIVATE_BASE: 2243 case AMDGPU::SRC_PRIVATE_LIMIT: 2244 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2245 return true; 2246 case AMDGPU::SRC_VCCZ: 2247 case AMDGPU::SRC_EXECZ: 2248 case AMDGPU::SRC_SCC: 2249 return true; 2250 case AMDGPU::SGPR_NULL: 2251 return true; 2252 default: 2253 return false; 2254 } 2255 } 2256 2257 bool AMDGPUOperand::isInlineValue() const { 2258 return isRegKind() && ::isInlineValue(getReg()); 2259 } 2260 2261 //===----------------------------------------------------------------------===// 2262 // AsmParser 2263 //===----------------------------------------------------------------------===// 2264 2265 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2266 if (Is == IS_VGPR) { 2267 switch (RegWidth) { 2268 default: return -1; 2269 case 32: 2270 return AMDGPU::VGPR_32RegClassID; 2271 case 64: 2272 return AMDGPU::VReg_64RegClassID; 2273 case 96: 2274 return AMDGPU::VReg_96RegClassID; 2275 case 128: 2276 return AMDGPU::VReg_128RegClassID; 2277 case 160: 2278 return AMDGPU::VReg_160RegClassID; 2279 case 192: 2280 return AMDGPU::VReg_192RegClassID; 2281 case 224: 2282 return AMDGPU::VReg_224RegClassID; 2283 case 256: 2284 return AMDGPU::VReg_256RegClassID; 2285 case 512: 2286 return AMDGPU::VReg_512RegClassID; 2287 case 1024: 2288 return AMDGPU::VReg_1024RegClassID; 2289 } 2290 } else if (Is == IS_TTMP) { 2291 switch (RegWidth) { 2292 default: return -1; 2293 case 32: 2294 return AMDGPU::TTMP_32RegClassID; 2295 case 64: 2296 return AMDGPU::TTMP_64RegClassID; 2297 case 128: 2298 return AMDGPU::TTMP_128RegClassID; 2299 case 256: 2300 return AMDGPU::TTMP_256RegClassID; 2301 case 512: 2302 return AMDGPU::TTMP_512RegClassID; 2303 } 2304 } else if (Is == IS_SGPR) { 2305 switch (RegWidth) { 2306 default: return -1; 2307 case 32: 2308 return AMDGPU::SGPR_32RegClassID; 2309 case 64: 2310 return AMDGPU::SGPR_64RegClassID; 2311 case 96: 2312 return AMDGPU::SGPR_96RegClassID; 2313 case 128: 2314 return AMDGPU::SGPR_128RegClassID; 2315 case 160: 2316 return AMDGPU::SGPR_160RegClassID; 2317 case 192: 2318 return AMDGPU::SGPR_192RegClassID; 2319 case 224: 2320 return AMDGPU::SGPR_224RegClassID; 2321 case 256: 2322 return AMDGPU::SGPR_256RegClassID; 2323 case 512: 2324 return AMDGPU::SGPR_512RegClassID; 2325 } 2326 } else if (Is == IS_AGPR) { 2327 switch (RegWidth) { 2328 default: return -1; 2329 case 32: 2330 return AMDGPU::AGPR_32RegClassID; 2331 case 64: 2332 return AMDGPU::AReg_64RegClassID; 2333 case 96: 2334 return AMDGPU::AReg_96RegClassID; 2335 case 128: 2336 return AMDGPU::AReg_128RegClassID; 2337 case 160: 2338 return AMDGPU::AReg_160RegClassID; 2339 case 192: 2340 return AMDGPU::AReg_192RegClassID; 2341 case 224: 2342 return AMDGPU::AReg_224RegClassID; 2343 case 256: 2344 return AMDGPU::AReg_256RegClassID; 2345 case 512: 2346 return AMDGPU::AReg_512RegClassID; 2347 case 1024: 2348 return AMDGPU::AReg_1024RegClassID; 2349 } 2350 } 2351 return -1; 2352 } 2353 2354 static unsigned getSpecialRegForName(StringRef RegName) { 2355 return StringSwitch<unsigned>(RegName) 2356 .Case("exec", AMDGPU::EXEC) 2357 .Case("vcc", AMDGPU::VCC) 2358 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2359 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2360 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2361 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2362 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2363 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2364 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2365 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2366 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2367 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2368 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2369 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2370 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2371 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2372 .Case("m0", AMDGPU::M0) 2373 .Case("vccz", AMDGPU::SRC_VCCZ) 2374 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2375 .Case("execz", AMDGPU::SRC_EXECZ) 2376 .Case("src_execz", AMDGPU::SRC_EXECZ) 2377 .Case("scc", AMDGPU::SRC_SCC) 2378 .Case("src_scc", AMDGPU::SRC_SCC) 2379 .Case("tba", AMDGPU::TBA) 2380 .Case("tma", AMDGPU::TMA) 2381 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2382 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2383 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2384 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2385 .Case("vcc_lo", AMDGPU::VCC_LO) 2386 .Case("vcc_hi", AMDGPU::VCC_HI) 2387 .Case("exec_lo", AMDGPU::EXEC_LO) 2388 .Case("exec_hi", AMDGPU::EXEC_HI) 2389 .Case("tma_lo", AMDGPU::TMA_LO) 2390 .Case("tma_hi", AMDGPU::TMA_HI) 2391 .Case("tba_lo", AMDGPU::TBA_LO) 2392 .Case("tba_hi", AMDGPU::TBA_HI) 2393 .Case("pc", AMDGPU::PC_REG) 2394 .Case("null", AMDGPU::SGPR_NULL) 2395 .Default(AMDGPU::NoRegister); 2396 } 2397 2398 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2399 SMLoc &EndLoc, bool RestoreOnFailure) { 2400 auto R = parseRegister(); 2401 if (!R) return true; 2402 assert(R->isReg()); 2403 RegNo = R->getReg(); 2404 StartLoc = R->getStartLoc(); 2405 EndLoc = R->getEndLoc(); 2406 return false; 2407 } 2408 2409 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2410 SMLoc &EndLoc) { 2411 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2412 } 2413 2414 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2415 SMLoc &StartLoc, 2416 SMLoc &EndLoc) { 2417 bool Result = 2418 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2419 bool PendingErrors = getParser().hasPendingError(); 2420 getParser().clearPendingErrors(); 2421 if (PendingErrors) 2422 return MatchOperand_ParseFail; 2423 if (Result) 2424 return MatchOperand_NoMatch; 2425 return MatchOperand_Success; 2426 } 2427 2428 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2429 RegisterKind RegKind, unsigned Reg1, 2430 SMLoc Loc) { 2431 switch (RegKind) { 2432 case IS_SPECIAL: 2433 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2434 Reg = AMDGPU::EXEC; 2435 RegWidth = 64; 2436 return true; 2437 } 2438 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2439 Reg = AMDGPU::FLAT_SCR; 2440 RegWidth = 64; 2441 return true; 2442 } 2443 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2444 Reg = AMDGPU::XNACK_MASK; 2445 RegWidth = 64; 2446 return true; 2447 } 2448 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2449 Reg = AMDGPU::VCC; 2450 RegWidth = 64; 2451 return true; 2452 } 2453 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2454 Reg = AMDGPU::TBA; 2455 RegWidth = 64; 2456 return true; 2457 } 2458 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2459 Reg = AMDGPU::TMA; 2460 RegWidth = 64; 2461 return true; 2462 } 2463 Error(Loc, "register does not fit in the list"); 2464 return false; 2465 case IS_VGPR: 2466 case IS_SGPR: 2467 case IS_AGPR: 2468 case IS_TTMP: 2469 if (Reg1 != Reg + RegWidth / 32) { 2470 Error(Loc, "registers in a list must have consecutive indices"); 2471 return false; 2472 } 2473 RegWidth += 32; 2474 return true; 2475 default: 2476 llvm_unreachable("unexpected register kind"); 2477 } 2478 } 2479 2480 struct RegInfo { 2481 StringLiteral Name; 2482 RegisterKind Kind; 2483 }; 2484 2485 static constexpr RegInfo RegularRegisters[] = { 2486 {{"v"}, IS_VGPR}, 2487 {{"s"}, IS_SGPR}, 2488 {{"ttmp"}, IS_TTMP}, 2489 {{"acc"}, IS_AGPR}, 2490 {{"a"}, IS_AGPR}, 2491 }; 2492 2493 static bool isRegularReg(RegisterKind Kind) { 2494 return Kind == IS_VGPR || 2495 Kind == IS_SGPR || 2496 Kind == IS_TTMP || 2497 Kind == IS_AGPR; 2498 } 2499 2500 static const RegInfo* getRegularRegInfo(StringRef Str) { 2501 for (const RegInfo &Reg : RegularRegisters) 2502 if (Str.startswith(Reg.Name)) 2503 return &Reg; 2504 return nullptr; 2505 } 2506 2507 static bool getRegNum(StringRef Str, unsigned& Num) { 2508 return !Str.getAsInteger(10, Num); 2509 } 2510 2511 bool 2512 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2513 const AsmToken &NextToken) const { 2514 2515 // A list of consecutive registers: [s0,s1,s2,s3] 2516 if (Token.is(AsmToken::LBrac)) 2517 return true; 2518 2519 if (!Token.is(AsmToken::Identifier)) 2520 return false; 2521 2522 // A single register like s0 or a range of registers like s[0:1] 2523 2524 StringRef Str = Token.getString(); 2525 const RegInfo *Reg = getRegularRegInfo(Str); 2526 if (Reg) { 2527 StringRef RegName = Reg->Name; 2528 StringRef RegSuffix = Str.substr(RegName.size()); 2529 if (!RegSuffix.empty()) { 2530 unsigned Num; 2531 // A single register with an index: rXX 2532 if (getRegNum(RegSuffix, Num)) 2533 return true; 2534 } else { 2535 // A range of registers: r[XX:YY]. 2536 if (NextToken.is(AsmToken::LBrac)) 2537 return true; 2538 } 2539 } 2540 2541 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2542 } 2543 2544 bool 2545 AMDGPUAsmParser::isRegister() 2546 { 2547 return isRegister(getToken(), peekToken()); 2548 } 2549 2550 unsigned 2551 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2552 unsigned RegNum, 2553 unsigned RegWidth, 2554 SMLoc Loc) { 2555 2556 assert(isRegularReg(RegKind)); 2557 2558 unsigned AlignSize = 1; 2559 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2560 // SGPR and TTMP registers must be aligned. 2561 // Max required alignment is 4 dwords. 2562 AlignSize = std::min(RegWidth / 32, 4u); 2563 } 2564 2565 if (RegNum % AlignSize != 0) { 2566 Error(Loc, "invalid register alignment"); 2567 return AMDGPU::NoRegister; 2568 } 2569 2570 unsigned RegIdx = RegNum / AlignSize; 2571 int RCID = getRegClass(RegKind, RegWidth); 2572 if (RCID == -1) { 2573 Error(Loc, "invalid or unsupported register size"); 2574 return AMDGPU::NoRegister; 2575 } 2576 2577 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2578 const MCRegisterClass RC = TRI->getRegClass(RCID); 2579 if (RegIdx >= RC.getNumRegs()) { 2580 Error(Loc, "register index is out of range"); 2581 return AMDGPU::NoRegister; 2582 } 2583 2584 return RC.getRegister(RegIdx); 2585 } 2586 2587 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2588 int64_t RegLo, RegHi; 2589 if (!skipToken(AsmToken::LBrac, "missing register index")) 2590 return false; 2591 2592 SMLoc FirstIdxLoc = getLoc(); 2593 SMLoc SecondIdxLoc; 2594 2595 if (!parseExpr(RegLo)) 2596 return false; 2597 2598 if (trySkipToken(AsmToken::Colon)) { 2599 SecondIdxLoc = getLoc(); 2600 if (!parseExpr(RegHi)) 2601 return false; 2602 } else { 2603 RegHi = RegLo; 2604 } 2605 2606 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2607 return false; 2608 2609 if (!isUInt<32>(RegLo)) { 2610 Error(FirstIdxLoc, "invalid register index"); 2611 return false; 2612 } 2613 2614 if (!isUInt<32>(RegHi)) { 2615 Error(SecondIdxLoc, "invalid register index"); 2616 return false; 2617 } 2618 2619 if (RegLo > RegHi) { 2620 Error(FirstIdxLoc, "first register index should not exceed second index"); 2621 return false; 2622 } 2623 2624 Num = static_cast<unsigned>(RegLo); 2625 RegWidth = 32 * ((RegHi - RegLo) + 1); 2626 return true; 2627 } 2628 2629 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2630 unsigned &RegNum, unsigned &RegWidth, 2631 SmallVectorImpl<AsmToken> &Tokens) { 2632 assert(isToken(AsmToken::Identifier)); 2633 unsigned Reg = getSpecialRegForName(getTokenStr()); 2634 if (Reg) { 2635 RegNum = 0; 2636 RegWidth = 32; 2637 RegKind = IS_SPECIAL; 2638 Tokens.push_back(getToken()); 2639 lex(); // skip register name 2640 } 2641 return Reg; 2642 } 2643 2644 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2645 unsigned &RegNum, unsigned &RegWidth, 2646 SmallVectorImpl<AsmToken> &Tokens) { 2647 assert(isToken(AsmToken::Identifier)); 2648 StringRef RegName = getTokenStr(); 2649 auto Loc = getLoc(); 2650 2651 const RegInfo *RI = getRegularRegInfo(RegName); 2652 if (!RI) { 2653 Error(Loc, "invalid register name"); 2654 return AMDGPU::NoRegister; 2655 } 2656 2657 Tokens.push_back(getToken()); 2658 lex(); // skip register name 2659 2660 RegKind = RI->Kind; 2661 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2662 if (!RegSuffix.empty()) { 2663 // Single 32-bit register: vXX. 2664 if (!getRegNum(RegSuffix, RegNum)) { 2665 Error(Loc, "invalid register index"); 2666 return AMDGPU::NoRegister; 2667 } 2668 RegWidth = 32; 2669 } else { 2670 // Range of registers: v[XX:YY]. ":YY" is optional. 2671 if (!ParseRegRange(RegNum, RegWidth)) 2672 return AMDGPU::NoRegister; 2673 } 2674 2675 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2676 } 2677 2678 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2679 unsigned &RegWidth, 2680 SmallVectorImpl<AsmToken> &Tokens) { 2681 unsigned Reg = AMDGPU::NoRegister; 2682 auto ListLoc = getLoc(); 2683 2684 if (!skipToken(AsmToken::LBrac, 2685 "expected a register or a list of registers")) { 2686 return AMDGPU::NoRegister; 2687 } 2688 2689 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2690 2691 auto Loc = getLoc(); 2692 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2693 return AMDGPU::NoRegister; 2694 if (RegWidth != 32) { 2695 Error(Loc, "expected a single 32-bit register"); 2696 return AMDGPU::NoRegister; 2697 } 2698 2699 for (; trySkipToken(AsmToken::Comma); ) { 2700 RegisterKind NextRegKind; 2701 unsigned NextReg, NextRegNum, NextRegWidth; 2702 Loc = getLoc(); 2703 2704 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2705 NextRegNum, NextRegWidth, 2706 Tokens)) { 2707 return AMDGPU::NoRegister; 2708 } 2709 if (NextRegWidth != 32) { 2710 Error(Loc, "expected a single 32-bit register"); 2711 return AMDGPU::NoRegister; 2712 } 2713 if (NextRegKind != RegKind) { 2714 Error(Loc, "registers in a list must be of the same kind"); 2715 return AMDGPU::NoRegister; 2716 } 2717 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2718 return AMDGPU::NoRegister; 2719 } 2720 2721 if (!skipToken(AsmToken::RBrac, 2722 "expected a comma or a closing square bracket")) { 2723 return AMDGPU::NoRegister; 2724 } 2725 2726 if (isRegularReg(RegKind)) 2727 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2728 2729 return Reg; 2730 } 2731 2732 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2733 unsigned &RegNum, unsigned &RegWidth, 2734 SmallVectorImpl<AsmToken> &Tokens) { 2735 auto Loc = getLoc(); 2736 Reg = AMDGPU::NoRegister; 2737 2738 if (isToken(AsmToken::Identifier)) { 2739 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2740 if (Reg == AMDGPU::NoRegister) 2741 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2742 } else { 2743 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2744 } 2745 2746 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2747 if (Reg == AMDGPU::NoRegister) { 2748 assert(Parser.hasPendingError()); 2749 return false; 2750 } 2751 2752 if (!subtargetHasRegister(*TRI, Reg)) { 2753 if (Reg == AMDGPU::SGPR_NULL) { 2754 Error(Loc, "'null' operand is not supported on this GPU"); 2755 } else { 2756 Error(Loc, "register not available on this GPU"); 2757 } 2758 return false; 2759 } 2760 2761 return true; 2762 } 2763 2764 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2765 unsigned &RegNum, unsigned &RegWidth, 2766 bool RestoreOnFailure /*=false*/) { 2767 Reg = AMDGPU::NoRegister; 2768 2769 SmallVector<AsmToken, 1> Tokens; 2770 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2771 if (RestoreOnFailure) { 2772 while (!Tokens.empty()) { 2773 getLexer().UnLex(Tokens.pop_back_val()); 2774 } 2775 } 2776 return true; 2777 } 2778 return false; 2779 } 2780 2781 Optional<StringRef> 2782 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2783 switch (RegKind) { 2784 case IS_VGPR: 2785 return StringRef(".amdgcn.next_free_vgpr"); 2786 case IS_SGPR: 2787 return StringRef(".amdgcn.next_free_sgpr"); 2788 default: 2789 return None; 2790 } 2791 } 2792 2793 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2794 auto SymbolName = getGprCountSymbolName(RegKind); 2795 assert(SymbolName && "initializing invalid register kind"); 2796 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2797 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2798 } 2799 2800 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2801 unsigned DwordRegIndex, 2802 unsigned RegWidth) { 2803 // Symbols are only defined for GCN targets 2804 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2805 return true; 2806 2807 auto SymbolName = getGprCountSymbolName(RegKind); 2808 if (!SymbolName) 2809 return true; 2810 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2811 2812 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2813 int64_t OldCount; 2814 2815 if (!Sym->isVariable()) 2816 return !Error(getLoc(), 2817 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2818 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2819 return !Error( 2820 getLoc(), 2821 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2822 2823 if (OldCount <= NewMax) 2824 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2825 2826 return true; 2827 } 2828 2829 std::unique_ptr<AMDGPUOperand> 2830 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2831 const auto &Tok = getToken(); 2832 SMLoc StartLoc = Tok.getLoc(); 2833 SMLoc EndLoc = Tok.getEndLoc(); 2834 RegisterKind RegKind; 2835 unsigned Reg, RegNum, RegWidth; 2836 2837 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2838 return nullptr; 2839 } 2840 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2841 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2842 return nullptr; 2843 } else 2844 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2845 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2846 } 2847 2848 OperandMatchResultTy 2849 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2850 // TODO: add syntactic sugar for 1/(2*PI) 2851 2852 assert(!isRegister()); 2853 assert(!isModifier()); 2854 2855 const auto& Tok = getToken(); 2856 const auto& NextTok = peekToken(); 2857 bool IsReal = Tok.is(AsmToken::Real); 2858 SMLoc S = getLoc(); 2859 bool Negate = false; 2860 2861 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2862 lex(); 2863 IsReal = true; 2864 Negate = true; 2865 } 2866 2867 if (IsReal) { 2868 // Floating-point expressions are not supported. 2869 // Can only allow floating-point literals with an 2870 // optional sign. 2871 2872 StringRef Num = getTokenStr(); 2873 lex(); 2874 2875 APFloat RealVal(APFloat::IEEEdouble()); 2876 auto roundMode = APFloat::rmNearestTiesToEven; 2877 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2878 return MatchOperand_ParseFail; 2879 } 2880 if (Negate) 2881 RealVal.changeSign(); 2882 2883 Operands.push_back( 2884 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2885 AMDGPUOperand::ImmTyNone, true)); 2886 2887 return MatchOperand_Success; 2888 2889 } else { 2890 int64_t IntVal; 2891 const MCExpr *Expr; 2892 SMLoc S = getLoc(); 2893 2894 if (HasSP3AbsModifier) { 2895 // This is a workaround for handling expressions 2896 // as arguments of SP3 'abs' modifier, for example: 2897 // |1.0| 2898 // |-1| 2899 // |1+x| 2900 // This syntax is not compatible with syntax of standard 2901 // MC expressions (due to the trailing '|'). 2902 SMLoc EndLoc; 2903 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2904 return MatchOperand_ParseFail; 2905 } else { 2906 if (Parser.parseExpression(Expr)) 2907 return MatchOperand_ParseFail; 2908 } 2909 2910 if (Expr->evaluateAsAbsolute(IntVal)) { 2911 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2912 } else { 2913 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2914 } 2915 2916 return MatchOperand_Success; 2917 } 2918 2919 return MatchOperand_NoMatch; 2920 } 2921 2922 OperandMatchResultTy 2923 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2924 if (!isRegister()) 2925 return MatchOperand_NoMatch; 2926 2927 if (auto R = parseRegister()) { 2928 assert(R->isReg()); 2929 Operands.push_back(std::move(R)); 2930 return MatchOperand_Success; 2931 } 2932 return MatchOperand_ParseFail; 2933 } 2934 2935 OperandMatchResultTy 2936 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2937 auto res = parseReg(Operands); 2938 if (res != MatchOperand_NoMatch) { 2939 return res; 2940 } else if (isModifier()) { 2941 return MatchOperand_NoMatch; 2942 } else { 2943 return parseImm(Operands, HasSP3AbsMod); 2944 } 2945 } 2946 2947 bool 2948 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2949 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2950 const auto &str = Token.getString(); 2951 return str == "abs" || str == "neg" || str == "sext"; 2952 } 2953 return false; 2954 } 2955 2956 bool 2957 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2958 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2959 } 2960 2961 bool 2962 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2963 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2964 } 2965 2966 bool 2967 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2968 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2969 } 2970 2971 // Check if this is an operand modifier or an opcode modifier 2972 // which may look like an expression but it is not. We should 2973 // avoid parsing these modifiers as expressions. Currently 2974 // recognized sequences are: 2975 // |...| 2976 // abs(...) 2977 // neg(...) 2978 // sext(...) 2979 // -reg 2980 // -|...| 2981 // -abs(...) 2982 // name:... 2983 // Note that simple opcode modifiers like 'gds' may be parsed as 2984 // expressions; this is a special case. See getExpressionAsToken. 2985 // 2986 bool 2987 AMDGPUAsmParser::isModifier() { 2988 2989 AsmToken Tok = getToken(); 2990 AsmToken NextToken[2]; 2991 peekTokens(NextToken); 2992 2993 return isOperandModifier(Tok, NextToken[0]) || 2994 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2995 isOpcodeModifierWithVal(Tok, NextToken[0]); 2996 } 2997 2998 // Check if the current token is an SP3 'neg' modifier. 2999 // Currently this modifier is allowed in the following context: 3000 // 3001 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3002 // 2. Before an 'abs' modifier: -abs(...) 3003 // 3. Before an SP3 'abs' modifier: -|...| 3004 // 3005 // In all other cases "-" is handled as a part 3006 // of an expression that follows the sign. 3007 // 3008 // Note: When "-" is followed by an integer literal, 3009 // this is interpreted as integer negation rather 3010 // than a floating-point NEG modifier applied to N. 3011 // Beside being contr-intuitive, such use of floating-point 3012 // NEG modifier would have resulted in different meaning 3013 // of integer literals used with VOP1/2/C and VOP3, 3014 // for example: 3015 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3016 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3017 // Negative fp literals with preceding "-" are 3018 // handled likewise for uniformity 3019 // 3020 bool 3021 AMDGPUAsmParser::parseSP3NegModifier() { 3022 3023 AsmToken NextToken[2]; 3024 peekTokens(NextToken); 3025 3026 if (isToken(AsmToken::Minus) && 3027 (isRegister(NextToken[0], NextToken[1]) || 3028 NextToken[0].is(AsmToken::Pipe) || 3029 isId(NextToken[0], "abs"))) { 3030 lex(); 3031 return true; 3032 } 3033 3034 return false; 3035 } 3036 3037 OperandMatchResultTy 3038 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3039 bool AllowImm) { 3040 bool Neg, SP3Neg; 3041 bool Abs, SP3Abs; 3042 SMLoc Loc; 3043 3044 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3045 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3046 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3047 return MatchOperand_ParseFail; 3048 } 3049 3050 SP3Neg = parseSP3NegModifier(); 3051 3052 Loc = getLoc(); 3053 Neg = trySkipId("neg"); 3054 if (Neg && SP3Neg) { 3055 Error(Loc, "expected register or immediate"); 3056 return MatchOperand_ParseFail; 3057 } 3058 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3059 return MatchOperand_ParseFail; 3060 3061 Abs = trySkipId("abs"); 3062 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3063 return MatchOperand_ParseFail; 3064 3065 Loc = getLoc(); 3066 SP3Abs = trySkipToken(AsmToken::Pipe); 3067 if (Abs && SP3Abs) { 3068 Error(Loc, "expected register or immediate"); 3069 return MatchOperand_ParseFail; 3070 } 3071 3072 OperandMatchResultTy Res; 3073 if (AllowImm) { 3074 Res = parseRegOrImm(Operands, SP3Abs); 3075 } else { 3076 Res = parseReg(Operands); 3077 } 3078 if (Res != MatchOperand_Success) { 3079 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3080 } 3081 3082 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3083 return MatchOperand_ParseFail; 3084 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3085 return MatchOperand_ParseFail; 3086 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3087 return MatchOperand_ParseFail; 3088 3089 AMDGPUOperand::Modifiers Mods; 3090 Mods.Abs = Abs || SP3Abs; 3091 Mods.Neg = Neg || SP3Neg; 3092 3093 if (Mods.hasFPModifiers()) { 3094 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3095 if (Op.isExpr()) { 3096 Error(Op.getStartLoc(), "expected an absolute expression"); 3097 return MatchOperand_ParseFail; 3098 } 3099 Op.setModifiers(Mods); 3100 } 3101 return MatchOperand_Success; 3102 } 3103 3104 OperandMatchResultTy 3105 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3106 bool AllowImm) { 3107 bool Sext = trySkipId("sext"); 3108 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3109 return MatchOperand_ParseFail; 3110 3111 OperandMatchResultTy Res; 3112 if (AllowImm) { 3113 Res = parseRegOrImm(Operands); 3114 } else { 3115 Res = parseReg(Operands); 3116 } 3117 if (Res != MatchOperand_Success) { 3118 return Sext? MatchOperand_ParseFail : Res; 3119 } 3120 3121 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3122 return MatchOperand_ParseFail; 3123 3124 AMDGPUOperand::Modifiers Mods; 3125 Mods.Sext = Sext; 3126 3127 if (Mods.hasIntModifiers()) { 3128 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3129 if (Op.isExpr()) { 3130 Error(Op.getStartLoc(), "expected an absolute expression"); 3131 return MatchOperand_ParseFail; 3132 } 3133 Op.setModifiers(Mods); 3134 } 3135 3136 return MatchOperand_Success; 3137 } 3138 3139 OperandMatchResultTy 3140 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3141 return parseRegOrImmWithFPInputMods(Operands, false); 3142 } 3143 3144 OperandMatchResultTy 3145 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3146 return parseRegOrImmWithIntInputMods(Operands, false); 3147 } 3148 3149 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3150 auto Loc = getLoc(); 3151 if (trySkipId("off")) { 3152 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3153 AMDGPUOperand::ImmTyOff, false)); 3154 return MatchOperand_Success; 3155 } 3156 3157 if (!isRegister()) 3158 return MatchOperand_NoMatch; 3159 3160 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3161 if (Reg) { 3162 Operands.push_back(std::move(Reg)); 3163 return MatchOperand_Success; 3164 } 3165 3166 return MatchOperand_ParseFail; 3167 3168 } 3169 3170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3171 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3172 3173 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3174 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3175 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3176 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3177 return Match_InvalidOperand; 3178 3179 if ((TSFlags & SIInstrFlags::VOP3) && 3180 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3181 getForcedEncodingSize() != 64) 3182 return Match_PreferE32; 3183 3184 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3185 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3186 // v_mac_f32/16 allow only dst_sel == DWORD; 3187 auto OpNum = 3188 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3189 const auto &Op = Inst.getOperand(OpNum); 3190 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3191 return Match_InvalidOperand; 3192 } 3193 } 3194 3195 return Match_Success; 3196 } 3197 3198 static ArrayRef<unsigned> getAllVariants() { 3199 static const unsigned Variants[] = { 3200 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3201 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3202 }; 3203 3204 return makeArrayRef(Variants); 3205 } 3206 3207 // What asm variants we should check 3208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3209 if (getForcedEncodingSize() == 32) { 3210 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3211 return makeArrayRef(Variants); 3212 } 3213 3214 if (isForcedVOP3()) { 3215 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3216 return makeArrayRef(Variants); 3217 } 3218 3219 if (isForcedSDWA()) { 3220 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3221 AMDGPUAsmVariants::SDWA9}; 3222 return makeArrayRef(Variants); 3223 } 3224 3225 if (isForcedDPP()) { 3226 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3227 return makeArrayRef(Variants); 3228 } 3229 3230 return getAllVariants(); 3231 } 3232 3233 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3234 if (getForcedEncodingSize() == 32) 3235 return "e32"; 3236 3237 if (isForcedVOP3()) 3238 return "e64"; 3239 3240 if (isForcedSDWA()) 3241 return "sdwa"; 3242 3243 if (isForcedDPP()) 3244 return "dpp"; 3245 3246 return ""; 3247 } 3248 3249 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3250 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3251 const unsigned Num = Desc.getNumImplicitUses(); 3252 for (unsigned i = 0; i < Num; ++i) { 3253 unsigned Reg = Desc.ImplicitUses[i]; 3254 switch (Reg) { 3255 case AMDGPU::FLAT_SCR: 3256 case AMDGPU::VCC: 3257 case AMDGPU::VCC_LO: 3258 case AMDGPU::VCC_HI: 3259 case AMDGPU::M0: 3260 return Reg; 3261 default: 3262 break; 3263 } 3264 } 3265 return AMDGPU::NoRegister; 3266 } 3267 3268 // NB: This code is correct only when used to check constant 3269 // bus limitations because GFX7 support no f16 inline constants. 3270 // Note that there are no cases when a GFX7 opcode violates 3271 // constant bus limitations due to the use of an f16 constant. 3272 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3273 unsigned OpIdx) const { 3274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3275 3276 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3277 return false; 3278 } 3279 3280 const MCOperand &MO = Inst.getOperand(OpIdx); 3281 3282 int64_t Val = MO.getImm(); 3283 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3284 3285 switch (OpSize) { // expected operand size 3286 case 8: 3287 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3288 case 4: 3289 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3290 case 2: { 3291 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3292 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3293 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3294 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3295 return AMDGPU::isInlinableIntLiteral(Val); 3296 3297 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3298 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3299 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3300 return AMDGPU::isInlinableIntLiteralV216(Val); 3301 3302 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3303 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3304 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3305 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3306 3307 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3308 } 3309 default: 3310 llvm_unreachable("invalid operand size"); 3311 } 3312 } 3313 3314 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3315 if (!isGFX10Plus()) 3316 return 1; 3317 3318 switch (Opcode) { 3319 // 64-bit shift instructions can use only one scalar value input 3320 case AMDGPU::V_LSHLREV_B64_e64: 3321 case AMDGPU::V_LSHLREV_B64_gfx10: 3322 case AMDGPU::V_LSHRREV_B64_e64: 3323 case AMDGPU::V_LSHRREV_B64_gfx10: 3324 case AMDGPU::V_ASHRREV_I64_e64: 3325 case AMDGPU::V_ASHRREV_I64_gfx10: 3326 case AMDGPU::V_LSHL_B64_e64: 3327 case AMDGPU::V_LSHR_B64_e64: 3328 case AMDGPU::V_ASHR_I64_e64: 3329 return 1; 3330 default: 3331 return 2; 3332 } 3333 } 3334 3335 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3336 const MCOperand &MO = Inst.getOperand(OpIdx); 3337 if (MO.isImm()) { 3338 return !isInlineConstant(Inst, OpIdx); 3339 } else if (MO.isReg()) { 3340 auto Reg = MO.getReg(); 3341 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3342 auto PReg = mc2PseudoReg(Reg); 3343 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3344 } else { 3345 return true; 3346 } 3347 } 3348 3349 bool 3350 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3351 const OperandVector &Operands) { 3352 const unsigned Opcode = Inst.getOpcode(); 3353 const MCInstrDesc &Desc = MII.get(Opcode); 3354 unsigned LastSGPR = AMDGPU::NoRegister; 3355 unsigned ConstantBusUseCount = 0; 3356 unsigned NumLiterals = 0; 3357 unsigned LiteralSize; 3358 3359 if (Desc.TSFlags & 3360 (SIInstrFlags::VOPC | 3361 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3362 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3363 SIInstrFlags::SDWA)) { 3364 // Check special imm operands (used by madmk, etc) 3365 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3366 ++NumLiterals; 3367 LiteralSize = 4; 3368 } 3369 3370 SmallDenseSet<unsigned> SGPRsUsed; 3371 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3372 if (SGPRUsed != AMDGPU::NoRegister) { 3373 SGPRsUsed.insert(SGPRUsed); 3374 ++ConstantBusUseCount; 3375 } 3376 3377 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3378 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3379 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3380 3381 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3382 3383 for (int OpIdx : OpIndices) { 3384 if (OpIdx == -1) break; 3385 3386 const MCOperand &MO = Inst.getOperand(OpIdx); 3387 if (usesConstantBus(Inst, OpIdx)) { 3388 if (MO.isReg()) { 3389 LastSGPR = mc2PseudoReg(MO.getReg()); 3390 // Pairs of registers with a partial intersections like these 3391 // s0, s[0:1] 3392 // flat_scratch_lo, flat_scratch 3393 // flat_scratch_lo, flat_scratch_hi 3394 // are theoretically valid but they are disabled anyway. 3395 // Note that this code mimics SIInstrInfo::verifyInstruction 3396 if (!SGPRsUsed.count(LastSGPR)) { 3397 SGPRsUsed.insert(LastSGPR); 3398 ++ConstantBusUseCount; 3399 } 3400 } else { // Expression or a literal 3401 3402 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3403 continue; // special operand like VINTERP attr_chan 3404 3405 // An instruction may use only one literal. 3406 // This has been validated on the previous step. 3407 // See validateVOPLiteral. 3408 // This literal may be used as more than one operand. 3409 // If all these operands are of the same size, 3410 // this literal counts as one scalar value. 3411 // Otherwise it counts as 2 scalar values. 3412 // See "GFX10 Shader Programming", section 3.6.2.3. 3413 3414 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3415 if (Size < 4) Size = 4; 3416 3417 if (NumLiterals == 0) { 3418 NumLiterals = 1; 3419 LiteralSize = Size; 3420 } else if (LiteralSize != Size) { 3421 NumLiterals = 2; 3422 } 3423 } 3424 } 3425 } 3426 } 3427 ConstantBusUseCount += NumLiterals; 3428 3429 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3430 return true; 3431 3432 SMLoc LitLoc = getLitLoc(Operands); 3433 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3434 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3435 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3436 return false; 3437 } 3438 3439 bool 3440 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3441 const OperandVector &Operands) { 3442 const unsigned Opcode = Inst.getOpcode(); 3443 const MCInstrDesc &Desc = MII.get(Opcode); 3444 3445 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3446 if (DstIdx == -1 || 3447 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3448 return true; 3449 } 3450 3451 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3452 3453 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3454 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3455 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3456 3457 assert(DstIdx != -1); 3458 const MCOperand &Dst = Inst.getOperand(DstIdx); 3459 assert(Dst.isReg()); 3460 3461 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3462 3463 for (int SrcIdx : SrcIndices) { 3464 if (SrcIdx == -1) break; 3465 const MCOperand &Src = Inst.getOperand(SrcIdx); 3466 if (Src.isReg()) { 3467 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3468 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3469 Error(getRegLoc(SrcReg, Operands), 3470 "destination must be different than all sources"); 3471 return false; 3472 } 3473 } 3474 } 3475 3476 return true; 3477 } 3478 3479 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3480 3481 const unsigned Opc = Inst.getOpcode(); 3482 const MCInstrDesc &Desc = MII.get(Opc); 3483 3484 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3485 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3486 assert(ClampIdx != -1); 3487 return Inst.getOperand(ClampIdx).getImm() == 0; 3488 } 3489 3490 return true; 3491 } 3492 3493 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3494 3495 const unsigned Opc = Inst.getOpcode(); 3496 const MCInstrDesc &Desc = MII.get(Opc); 3497 3498 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3499 return None; 3500 3501 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3502 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3503 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3504 3505 assert(VDataIdx != -1); 3506 3507 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3508 return None; 3509 3510 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3511 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3512 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3513 if (DMask == 0) 3514 DMask = 1; 3515 3516 bool isPackedD16 = false; 3517 unsigned DataSize = 3518 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3519 if (hasPackedD16()) { 3520 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3521 isPackedD16 = D16Idx >= 0; 3522 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3523 DataSize = (DataSize + 1) / 2; 3524 } 3525 3526 if ((VDataSize / 4) == DataSize + TFESize) 3527 return None; 3528 3529 return StringRef(isPackedD16 3530 ? "image data size does not match dmask, d16 and tfe" 3531 : "image data size does not match dmask and tfe"); 3532 } 3533 3534 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3535 const unsigned Opc = Inst.getOpcode(); 3536 const MCInstrDesc &Desc = MII.get(Opc); 3537 3538 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3539 return true; 3540 3541 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3542 3543 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3544 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3545 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3546 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3547 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3548 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3549 3550 assert(VAddr0Idx != -1); 3551 assert(SrsrcIdx != -1); 3552 assert(SrsrcIdx > VAddr0Idx); 3553 3554 if (DimIdx == -1) 3555 return true; // intersect_ray 3556 3557 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3558 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3559 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3560 unsigned ActualAddrSize = 3561 IsNSA ? SrsrcIdx - VAddr0Idx 3562 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3563 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3564 3565 unsigned ExpectedAddrSize = 3566 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3567 3568 if (!IsNSA) { 3569 if (ExpectedAddrSize > 8) 3570 ExpectedAddrSize = 16; 3571 3572 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3573 // This provides backward compatibility for assembly created 3574 // before 160b/192b/224b types were directly supported. 3575 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3576 return true; 3577 } 3578 3579 return ActualAddrSize == ExpectedAddrSize; 3580 } 3581 3582 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3583 3584 const unsigned Opc = Inst.getOpcode(); 3585 const MCInstrDesc &Desc = MII.get(Opc); 3586 3587 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3588 return true; 3589 if (!Desc.mayLoad() || !Desc.mayStore()) 3590 return true; // Not atomic 3591 3592 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3593 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3594 3595 // This is an incomplete check because image_atomic_cmpswap 3596 // may only use 0x3 and 0xf while other atomic operations 3597 // may use 0x1 and 0x3. However these limitations are 3598 // verified when we check that dmask matches dst size. 3599 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3600 } 3601 3602 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3603 3604 const unsigned Opc = Inst.getOpcode(); 3605 const MCInstrDesc &Desc = MII.get(Opc); 3606 3607 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3608 return true; 3609 3610 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3611 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3612 3613 // GATHER4 instructions use dmask in a different fashion compared to 3614 // other MIMG instructions. The only useful DMASK values are 3615 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3616 // (red,red,red,red) etc.) The ISA document doesn't mention 3617 // this. 3618 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3619 } 3620 3621 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3622 const unsigned Opc = Inst.getOpcode(); 3623 const MCInstrDesc &Desc = MII.get(Opc); 3624 3625 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3626 return true; 3627 3628 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3629 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3630 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3631 3632 if (!BaseOpcode->MSAA) 3633 return true; 3634 3635 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3636 assert(DimIdx != -1); 3637 3638 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3639 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3640 3641 return DimInfo->MSAA; 3642 } 3643 3644 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3645 { 3646 switch (Opcode) { 3647 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3648 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3649 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3650 return true; 3651 default: 3652 return false; 3653 } 3654 } 3655 3656 // movrels* opcodes should only allow VGPRS as src0. 3657 // This is specified in .td description for vop1/vop3, 3658 // but sdwa is handled differently. See isSDWAOperand. 3659 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3660 const OperandVector &Operands) { 3661 3662 const unsigned Opc = Inst.getOpcode(); 3663 const MCInstrDesc &Desc = MII.get(Opc); 3664 3665 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3666 return true; 3667 3668 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3669 assert(Src0Idx != -1); 3670 3671 SMLoc ErrLoc; 3672 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3673 if (Src0.isReg()) { 3674 auto Reg = mc2PseudoReg(Src0.getReg()); 3675 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3676 if (!isSGPR(Reg, TRI)) 3677 return true; 3678 ErrLoc = getRegLoc(Reg, Operands); 3679 } else { 3680 ErrLoc = getConstLoc(Operands); 3681 } 3682 3683 Error(ErrLoc, "source operand must be a VGPR"); 3684 return false; 3685 } 3686 3687 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3688 const OperandVector &Operands) { 3689 3690 const unsigned Opc = Inst.getOpcode(); 3691 3692 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3693 return true; 3694 3695 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3696 assert(Src0Idx != -1); 3697 3698 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3699 if (!Src0.isReg()) 3700 return true; 3701 3702 auto Reg = mc2PseudoReg(Src0.getReg()); 3703 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3704 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3705 Error(getRegLoc(Reg, Operands), 3706 "source operand must be either a VGPR or an inline constant"); 3707 return false; 3708 } 3709 3710 return true; 3711 } 3712 3713 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3714 const OperandVector &Operands) { 3715 const unsigned Opc = Inst.getOpcode(); 3716 const MCInstrDesc &Desc = MII.get(Opc); 3717 3718 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3719 return true; 3720 3721 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3722 if (Src2Idx == -1) 3723 return true; 3724 3725 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3726 if (!Src2.isReg()) 3727 return true; 3728 3729 MCRegister Src2Reg = Src2.getReg(); 3730 MCRegister DstReg = Inst.getOperand(0).getReg(); 3731 if (Src2Reg == DstReg) 3732 return true; 3733 3734 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3735 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3736 return true; 3737 3738 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3739 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3740 "source 2 operand must not partially overlap with dst"); 3741 return false; 3742 } 3743 3744 return true; 3745 } 3746 3747 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3748 switch (Inst.getOpcode()) { 3749 default: 3750 return true; 3751 case V_DIV_SCALE_F32_gfx6_gfx7: 3752 case V_DIV_SCALE_F32_vi: 3753 case V_DIV_SCALE_F32_gfx10: 3754 case V_DIV_SCALE_F64_gfx6_gfx7: 3755 case V_DIV_SCALE_F64_vi: 3756 case V_DIV_SCALE_F64_gfx10: 3757 break; 3758 } 3759 3760 // TODO: Check that src0 = src1 or src2. 3761 3762 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3763 AMDGPU::OpName::src2_modifiers, 3764 AMDGPU::OpName::src2_modifiers}) { 3765 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3766 .getImm() & 3767 SISrcMods::ABS) { 3768 return false; 3769 } 3770 } 3771 3772 return true; 3773 } 3774 3775 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3776 3777 const unsigned Opc = Inst.getOpcode(); 3778 const MCInstrDesc &Desc = MII.get(Opc); 3779 3780 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3781 return true; 3782 3783 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3784 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3785 if (isCI() || isSI()) 3786 return false; 3787 } 3788 3789 return true; 3790 } 3791 3792 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3793 const unsigned Opc = Inst.getOpcode(); 3794 const MCInstrDesc &Desc = MII.get(Opc); 3795 3796 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3797 return true; 3798 3799 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3800 if (DimIdx < 0) 3801 return true; 3802 3803 long Imm = Inst.getOperand(DimIdx).getImm(); 3804 if (Imm < 0 || Imm >= 8) 3805 return false; 3806 3807 return true; 3808 } 3809 3810 static bool IsRevOpcode(const unsigned Opcode) 3811 { 3812 switch (Opcode) { 3813 case AMDGPU::V_SUBREV_F32_e32: 3814 case AMDGPU::V_SUBREV_F32_e64: 3815 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3816 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3817 case AMDGPU::V_SUBREV_F32_e32_vi: 3818 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3819 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3820 case AMDGPU::V_SUBREV_F32_e64_vi: 3821 3822 case AMDGPU::V_SUBREV_CO_U32_e32: 3823 case AMDGPU::V_SUBREV_CO_U32_e64: 3824 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3825 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3826 3827 case AMDGPU::V_SUBBREV_U32_e32: 3828 case AMDGPU::V_SUBBREV_U32_e64: 3829 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3830 case AMDGPU::V_SUBBREV_U32_e32_vi: 3831 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3832 case AMDGPU::V_SUBBREV_U32_e64_vi: 3833 3834 case AMDGPU::V_SUBREV_U32_e32: 3835 case AMDGPU::V_SUBREV_U32_e64: 3836 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3837 case AMDGPU::V_SUBREV_U32_e32_vi: 3838 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3839 case AMDGPU::V_SUBREV_U32_e64_vi: 3840 3841 case AMDGPU::V_SUBREV_F16_e32: 3842 case AMDGPU::V_SUBREV_F16_e64: 3843 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3844 case AMDGPU::V_SUBREV_F16_e32_vi: 3845 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3846 case AMDGPU::V_SUBREV_F16_e64_vi: 3847 3848 case AMDGPU::V_SUBREV_U16_e32: 3849 case AMDGPU::V_SUBREV_U16_e64: 3850 case AMDGPU::V_SUBREV_U16_e32_vi: 3851 case AMDGPU::V_SUBREV_U16_e64_vi: 3852 3853 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3854 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3855 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3856 3857 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3858 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3859 3860 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3861 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3862 3863 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3864 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3865 3866 case AMDGPU::V_LSHRREV_B32_e32: 3867 case AMDGPU::V_LSHRREV_B32_e64: 3868 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3869 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3870 case AMDGPU::V_LSHRREV_B32_e32_vi: 3871 case AMDGPU::V_LSHRREV_B32_e64_vi: 3872 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3873 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3874 3875 case AMDGPU::V_ASHRREV_I32_e32: 3876 case AMDGPU::V_ASHRREV_I32_e64: 3877 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3878 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3879 case AMDGPU::V_ASHRREV_I32_e32_vi: 3880 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3881 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3882 case AMDGPU::V_ASHRREV_I32_e64_vi: 3883 3884 case AMDGPU::V_LSHLREV_B32_e32: 3885 case AMDGPU::V_LSHLREV_B32_e64: 3886 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3887 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3888 case AMDGPU::V_LSHLREV_B32_e32_vi: 3889 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3890 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3891 case AMDGPU::V_LSHLREV_B32_e64_vi: 3892 3893 case AMDGPU::V_LSHLREV_B16_e32: 3894 case AMDGPU::V_LSHLREV_B16_e64: 3895 case AMDGPU::V_LSHLREV_B16_e32_vi: 3896 case AMDGPU::V_LSHLREV_B16_e64_vi: 3897 case AMDGPU::V_LSHLREV_B16_gfx10: 3898 3899 case AMDGPU::V_LSHRREV_B16_e32: 3900 case AMDGPU::V_LSHRREV_B16_e64: 3901 case AMDGPU::V_LSHRREV_B16_e32_vi: 3902 case AMDGPU::V_LSHRREV_B16_e64_vi: 3903 case AMDGPU::V_LSHRREV_B16_gfx10: 3904 3905 case AMDGPU::V_ASHRREV_I16_e32: 3906 case AMDGPU::V_ASHRREV_I16_e64: 3907 case AMDGPU::V_ASHRREV_I16_e32_vi: 3908 case AMDGPU::V_ASHRREV_I16_e64_vi: 3909 case AMDGPU::V_ASHRREV_I16_gfx10: 3910 3911 case AMDGPU::V_LSHLREV_B64_e64: 3912 case AMDGPU::V_LSHLREV_B64_gfx10: 3913 case AMDGPU::V_LSHLREV_B64_vi: 3914 3915 case AMDGPU::V_LSHRREV_B64_e64: 3916 case AMDGPU::V_LSHRREV_B64_gfx10: 3917 case AMDGPU::V_LSHRREV_B64_vi: 3918 3919 case AMDGPU::V_ASHRREV_I64_e64: 3920 case AMDGPU::V_ASHRREV_I64_gfx10: 3921 case AMDGPU::V_ASHRREV_I64_vi: 3922 3923 case AMDGPU::V_PK_LSHLREV_B16: 3924 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3925 case AMDGPU::V_PK_LSHLREV_B16_vi: 3926 3927 case AMDGPU::V_PK_LSHRREV_B16: 3928 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3929 case AMDGPU::V_PK_LSHRREV_B16_vi: 3930 case AMDGPU::V_PK_ASHRREV_I16: 3931 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3932 case AMDGPU::V_PK_ASHRREV_I16_vi: 3933 return true; 3934 default: 3935 return false; 3936 } 3937 } 3938 3939 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3940 3941 using namespace SIInstrFlags; 3942 const unsigned Opcode = Inst.getOpcode(); 3943 const MCInstrDesc &Desc = MII.get(Opcode); 3944 3945 // lds_direct register is defined so that it can be used 3946 // with 9-bit operands only. Ignore encodings which do not accept these. 3947 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3948 if ((Desc.TSFlags & Enc) == 0) 3949 return None; 3950 3951 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3952 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3953 if (SrcIdx == -1) 3954 break; 3955 const auto &Src = Inst.getOperand(SrcIdx); 3956 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3957 3958 if (isGFX90A()) 3959 return StringRef("lds_direct is not supported on this GPU"); 3960 3961 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3962 return StringRef("lds_direct cannot be used with this instruction"); 3963 3964 if (SrcName != OpName::src0) 3965 return StringRef("lds_direct may be used as src0 only"); 3966 } 3967 } 3968 3969 return None; 3970 } 3971 3972 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3973 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3974 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3975 if (Op.isFlatOffset()) 3976 return Op.getStartLoc(); 3977 } 3978 return getLoc(); 3979 } 3980 3981 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3982 const OperandVector &Operands) { 3983 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3984 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3985 return true; 3986 3987 auto Opcode = Inst.getOpcode(); 3988 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3989 assert(OpNum != -1); 3990 3991 const auto &Op = Inst.getOperand(OpNum); 3992 if (!hasFlatOffsets() && Op.getImm() != 0) { 3993 Error(getFlatOffsetLoc(Operands), 3994 "flat offset modifier is not supported on this GPU"); 3995 return false; 3996 } 3997 3998 // For FLAT segment the offset must be positive; 3999 // MSB is ignored and forced to zero. 4000 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4001 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4002 if (!isIntN(OffsetSize, Op.getImm())) { 4003 Error(getFlatOffsetLoc(Operands), 4004 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4005 return false; 4006 } 4007 } else { 4008 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4009 if (!isUIntN(OffsetSize, Op.getImm())) { 4010 Error(getFlatOffsetLoc(Operands), 4011 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4012 return false; 4013 } 4014 } 4015 4016 return true; 4017 } 4018 4019 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4020 // Start with second operand because SMEM Offset cannot be dst or src0. 4021 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4022 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4023 if (Op.isSMEMOffset()) 4024 return Op.getStartLoc(); 4025 } 4026 return getLoc(); 4027 } 4028 4029 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4030 const OperandVector &Operands) { 4031 if (isCI() || isSI()) 4032 return true; 4033 4034 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4035 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4036 return true; 4037 4038 auto Opcode = Inst.getOpcode(); 4039 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4040 if (OpNum == -1) 4041 return true; 4042 4043 const auto &Op = Inst.getOperand(OpNum); 4044 if (!Op.isImm()) 4045 return true; 4046 4047 uint64_t Offset = Op.getImm(); 4048 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4049 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4050 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4051 return true; 4052 4053 Error(getSMEMOffsetLoc(Operands), 4054 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4055 "expected a 21-bit signed offset"); 4056 4057 return false; 4058 } 4059 4060 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4061 unsigned Opcode = Inst.getOpcode(); 4062 const MCInstrDesc &Desc = MII.get(Opcode); 4063 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4064 return true; 4065 4066 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4067 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4068 4069 const int OpIndices[] = { Src0Idx, Src1Idx }; 4070 4071 unsigned NumExprs = 0; 4072 unsigned NumLiterals = 0; 4073 uint32_t LiteralValue; 4074 4075 for (int OpIdx : OpIndices) { 4076 if (OpIdx == -1) break; 4077 4078 const MCOperand &MO = Inst.getOperand(OpIdx); 4079 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4080 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4081 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4082 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4083 if (NumLiterals == 0 || LiteralValue != Value) { 4084 LiteralValue = Value; 4085 ++NumLiterals; 4086 } 4087 } else if (MO.isExpr()) { 4088 ++NumExprs; 4089 } 4090 } 4091 } 4092 4093 return NumLiterals + NumExprs <= 1; 4094 } 4095 4096 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4097 const unsigned Opc = Inst.getOpcode(); 4098 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4099 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4100 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4101 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4102 4103 if (OpSel & ~3) 4104 return false; 4105 } 4106 4107 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4108 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4109 if (OpSelIdx != -1) { 4110 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4111 return false; 4112 } 4113 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4114 if (OpSelHiIdx != -1) { 4115 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4116 return false; 4117 } 4118 } 4119 4120 return true; 4121 } 4122 4123 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4124 const OperandVector &Operands) { 4125 const unsigned Opc = Inst.getOpcode(); 4126 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4127 if (DppCtrlIdx < 0) 4128 return true; 4129 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4130 4131 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4132 // DPP64 is supported for row_newbcast only. 4133 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4134 if (Src0Idx >= 0 && 4135 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4136 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4137 Error(S, "64 bit dpp only supports row_newbcast"); 4138 return false; 4139 } 4140 } 4141 4142 return true; 4143 } 4144 4145 // Check if VCC register matches wavefront size 4146 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4147 auto FB = getFeatureBits(); 4148 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4149 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4150 } 4151 4152 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4153 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4154 const OperandVector &Operands) { 4155 unsigned Opcode = Inst.getOpcode(); 4156 const MCInstrDesc &Desc = MII.get(Opcode); 4157 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4158 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4159 ImmIdx == -1) 4160 return true; 4161 4162 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4163 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4164 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4165 4166 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4167 4168 unsigned NumExprs = 0; 4169 unsigned NumLiterals = 0; 4170 uint32_t LiteralValue; 4171 4172 for (int OpIdx : OpIndices) { 4173 if (OpIdx == -1) 4174 continue; 4175 4176 const MCOperand &MO = Inst.getOperand(OpIdx); 4177 if (!MO.isImm() && !MO.isExpr()) 4178 continue; 4179 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4180 continue; 4181 4182 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4183 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4184 Error(getConstLoc(Operands), 4185 "inline constants are not allowed for this operand"); 4186 return false; 4187 } 4188 4189 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4190 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4191 if (NumLiterals == 0 || LiteralValue != Value) { 4192 LiteralValue = Value; 4193 ++NumLiterals; 4194 } 4195 } else if (MO.isExpr()) { 4196 ++NumExprs; 4197 } 4198 } 4199 NumLiterals += NumExprs; 4200 4201 if (!NumLiterals) 4202 return true; 4203 4204 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4205 Error(getLitLoc(Operands), "literal operands are not supported"); 4206 return false; 4207 } 4208 4209 if (NumLiterals > 1) { 4210 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4211 return false; 4212 } 4213 4214 return true; 4215 } 4216 4217 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4218 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4219 const MCRegisterInfo *MRI) { 4220 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4221 if (OpIdx < 0) 4222 return -1; 4223 4224 const MCOperand &Op = Inst.getOperand(OpIdx); 4225 if (!Op.isReg()) 4226 return -1; 4227 4228 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4229 auto Reg = Sub ? Sub : Op.getReg(); 4230 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4231 return AGPR32.contains(Reg) ? 1 : 0; 4232 } 4233 4234 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4235 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4236 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4237 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4238 SIInstrFlags::DS)) == 0) 4239 return true; 4240 4241 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4242 : AMDGPU::OpName::vdata; 4243 4244 const MCRegisterInfo *MRI = getMRI(); 4245 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4246 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4247 4248 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4249 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4250 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4251 return false; 4252 } 4253 4254 auto FB = getFeatureBits(); 4255 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4256 if (DataAreg < 0 || DstAreg < 0) 4257 return true; 4258 return DstAreg == DataAreg; 4259 } 4260 4261 return DstAreg < 1 && DataAreg < 1; 4262 } 4263 4264 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4265 auto FB = getFeatureBits(); 4266 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4267 return true; 4268 4269 const MCRegisterInfo *MRI = getMRI(); 4270 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4271 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4272 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4273 const MCOperand &Op = Inst.getOperand(I); 4274 if (!Op.isReg()) 4275 continue; 4276 4277 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4278 if (!Sub) 4279 continue; 4280 4281 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4282 return false; 4283 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4284 return false; 4285 } 4286 4287 return true; 4288 } 4289 4290 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4291 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4292 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4293 if (Op.isBLGP()) 4294 return Op.getStartLoc(); 4295 } 4296 return SMLoc(); 4297 } 4298 4299 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4300 const OperandVector &Operands) { 4301 unsigned Opc = Inst.getOpcode(); 4302 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4303 if (BlgpIdx == -1) 4304 return true; 4305 SMLoc BLGPLoc = getBLGPLoc(Operands); 4306 if (!BLGPLoc.isValid()) 4307 return true; 4308 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4309 auto FB = getFeatureBits(); 4310 bool UsesNeg = false; 4311 if (FB[AMDGPU::FeatureGFX940Insts]) { 4312 switch (Opc) { 4313 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4314 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4315 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4316 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4317 UsesNeg = true; 4318 } 4319 } 4320 4321 if (IsNeg == UsesNeg) 4322 return true; 4323 4324 Error(BLGPLoc, 4325 UsesNeg ? "invalid modifier: blgp is not supported" 4326 : "invalid modifier: neg is not supported"); 4327 4328 return false; 4329 } 4330 4331 // gfx90a has an undocumented limitation: 4332 // DS_GWS opcodes must use even aligned registers. 4333 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4334 const OperandVector &Operands) { 4335 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4336 return true; 4337 4338 int Opc = Inst.getOpcode(); 4339 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4340 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4341 return true; 4342 4343 const MCRegisterInfo *MRI = getMRI(); 4344 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4345 int Data0Pos = 4346 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4347 assert(Data0Pos != -1); 4348 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4349 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4350 if (RegIdx & 1) { 4351 SMLoc RegLoc = getRegLoc(Reg, Operands); 4352 Error(RegLoc, "vgpr must be even aligned"); 4353 return false; 4354 } 4355 4356 return true; 4357 } 4358 4359 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4360 const OperandVector &Operands, 4361 const SMLoc &IDLoc) { 4362 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4363 AMDGPU::OpName::cpol); 4364 if (CPolPos == -1) 4365 return true; 4366 4367 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4368 4369 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4370 if ((TSFlags & (SIInstrFlags::SMRD)) && 4371 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4372 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4373 return false; 4374 } 4375 4376 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4377 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4378 StringRef CStr(S.getPointer()); 4379 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4380 Error(S, "scc is not supported on this GPU"); 4381 return false; 4382 } 4383 4384 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4385 return true; 4386 4387 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4388 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4389 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4390 : "instruction must use glc"); 4391 return false; 4392 } 4393 } else { 4394 if (CPol & CPol::GLC) { 4395 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4396 StringRef CStr(S.getPointer()); 4397 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4398 Error(S, isGFX940() ? "instruction must not use sc0" 4399 : "instruction must not use glc"); 4400 return false; 4401 } 4402 } 4403 4404 return true; 4405 } 4406 4407 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4408 const SMLoc &IDLoc, 4409 const OperandVector &Operands) { 4410 if (auto ErrMsg = validateLdsDirect(Inst)) { 4411 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4412 return false; 4413 } 4414 if (!validateSOPLiteral(Inst)) { 4415 Error(getLitLoc(Operands), 4416 "only one literal operand is allowed"); 4417 return false; 4418 } 4419 if (!validateVOPLiteral(Inst, Operands)) { 4420 return false; 4421 } 4422 if (!validateConstantBusLimitations(Inst, Operands)) { 4423 return false; 4424 } 4425 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4426 return false; 4427 } 4428 if (!validateIntClampSupported(Inst)) { 4429 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4430 "integer clamping is not supported on this GPU"); 4431 return false; 4432 } 4433 if (!validateOpSel(Inst)) { 4434 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4435 "invalid op_sel operand"); 4436 return false; 4437 } 4438 if (!validateDPP(Inst, Operands)) { 4439 return false; 4440 } 4441 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4442 if (!validateMIMGD16(Inst)) { 4443 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4444 "d16 modifier is not supported on this GPU"); 4445 return false; 4446 } 4447 if (!validateMIMGDim(Inst)) { 4448 Error(IDLoc, "dim modifier is required on this GPU"); 4449 return false; 4450 } 4451 if (!validateMIMGMSAA(Inst)) { 4452 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4453 "invalid dim; must be MSAA type"); 4454 return false; 4455 } 4456 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4457 Error(IDLoc, *ErrMsg); 4458 return false; 4459 } 4460 if (!validateMIMGAddrSize(Inst)) { 4461 Error(IDLoc, 4462 "image address size does not match dim and a16"); 4463 return false; 4464 } 4465 if (!validateMIMGAtomicDMask(Inst)) { 4466 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4467 "invalid atomic image dmask"); 4468 return false; 4469 } 4470 if (!validateMIMGGatherDMask(Inst)) { 4471 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4472 "invalid image_gather dmask: only one bit must be set"); 4473 return false; 4474 } 4475 if (!validateMovrels(Inst, Operands)) { 4476 return false; 4477 } 4478 if (!validateFlatOffset(Inst, Operands)) { 4479 return false; 4480 } 4481 if (!validateSMEMOffset(Inst, Operands)) { 4482 return false; 4483 } 4484 if (!validateMAIAccWrite(Inst, Operands)) { 4485 return false; 4486 } 4487 if (!validateMFMA(Inst, Operands)) { 4488 return false; 4489 } 4490 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4491 return false; 4492 } 4493 4494 if (!validateAGPRLdSt(Inst)) { 4495 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4496 ? "invalid register class: data and dst should be all VGPR or AGPR" 4497 : "invalid register class: agpr loads and stores not supported on this GPU" 4498 ); 4499 return false; 4500 } 4501 if (!validateVGPRAlign(Inst)) { 4502 Error(IDLoc, 4503 "invalid register class: vgpr tuples must be 64 bit aligned"); 4504 return false; 4505 } 4506 if (!validateGWS(Inst, Operands)) { 4507 return false; 4508 } 4509 4510 if (!validateBLGP(Inst, Operands)) { 4511 return false; 4512 } 4513 4514 if (!validateDivScale(Inst)) { 4515 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4516 return false; 4517 } 4518 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4519 return false; 4520 } 4521 4522 return true; 4523 } 4524 4525 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4526 const FeatureBitset &FBS, 4527 unsigned VariantID = 0); 4528 4529 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4530 const FeatureBitset &AvailableFeatures, 4531 unsigned VariantID); 4532 4533 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4534 const FeatureBitset &FBS) { 4535 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4536 } 4537 4538 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4539 const FeatureBitset &FBS, 4540 ArrayRef<unsigned> Variants) { 4541 for (auto Variant : Variants) { 4542 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4543 return true; 4544 } 4545 4546 return false; 4547 } 4548 4549 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4550 const SMLoc &IDLoc) { 4551 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4552 4553 // Check if requested instruction variant is supported. 4554 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4555 return false; 4556 4557 // This instruction is not supported. 4558 // Clear any other pending errors because they are no longer relevant. 4559 getParser().clearPendingErrors(); 4560 4561 // Requested instruction variant is not supported. 4562 // Check if any other variants are supported. 4563 StringRef VariantName = getMatchedVariantName(); 4564 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4565 return Error(IDLoc, 4566 Twine(VariantName, 4567 " variant of this instruction is not supported")); 4568 } 4569 4570 // Finally check if this instruction is supported on any other GPU. 4571 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4572 return Error(IDLoc, "instruction not supported on this GPU"); 4573 } 4574 4575 // Instruction not supported on any GPU. Probably a typo. 4576 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4577 return Error(IDLoc, "invalid instruction" + Suggestion); 4578 } 4579 4580 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4581 OperandVector &Operands, 4582 MCStreamer &Out, 4583 uint64_t &ErrorInfo, 4584 bool MatchingInlineAsm) { 4585 MCInst Inst; 4586 unsigned Result = Match_Success; 4587 for (auto Variant : getMatchedVariants()) { 4588 uint64_t EI; 4589 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4590 Variant); 4591 // We order match statuses from least to most specific. We use most specific 4592 // status as resulting 4593 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4594 if ((R == Match_Success) || 4595 (R == Match_PreferE32) || 4596 (R == Match_MissingFeature && Result != Match_PreferE32) || 4597 (R == Match_InvalidOperand && Result != Match_MissingFeature 4598 && Result != Match_PreferE32) || 4599 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4600 && Result != Match_MissingFeature 4601 && Result != Match_PreferE32)) { 4602 Result = R; 4603 ErrorInfo = EI; 4604 } 4605 if (R == Match_Success) 4606 break; 4607 } 4608 4609 if (Result == Match_Success) { 4610 if (!validateInstruction(Inst, IDLoc, Operands)) { 4611 return true; 4612 } 4613 Inst.setLoc(IDLoc); 4614 Out.emitInstruction(Inst, getSTI()); 4615 return false; 4616 } 4617 4618 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4619 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4620 return true; 4621 } 4622 4623 switch (Result) { 4624 default: break; 4625 case Match_MissingFeature: 4626 // It has been verified that the specified instruction 4627 // mnemonic is valid. A match was found but it requires 4628 // features which are not supported on this GPU. 4629 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4630 4631 case Match_InvalidOperand: { 4632 SMLoc ErrorLoc = IDLoc; 4633 if (ErrorInfo != ~0ULL) { 4634 if (ErrorInfo >= Operands.size()) { 4635 return Error(IDLoc, "too few operands for instruction"); 4636 } 4637 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4638 if (ErrorLoc == SMLoc()) 4639 ErrorLoc = IDLoc; 4640 } 4641 return Error(ErrorLoc, "invalid operand for instruction"); 4642 } 4643 4644 case Match_PreferE32: 4645 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4646 "should be encoded as e32"); 4647 case Match_MnemonicFail: 4648 llvm_unreachable("Invalid instructions should have been handled already"); 4649 } 4650 llvm_unreachable("Implement any new match types added!"); 4651 } 4652 4653 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4654 int64_t Tmp = -1; 4655 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4656 return true; 4657 } 4658 if (getParser().parseAbsoluteExpression(Tmp)) { 4659 return true; 4660 } 4661 Ret = static_cast<uint32_t>(Tmp); 4662 return false; 4663 } 4664 4665 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4666 uint32_t &Minor) { 4667 if (ParseAsAbsoluteExpression(Major)) 4668 return TokError("invalid major version"); 4669 4670 if (!trySkipToken(AsmToken::Comma)) 4671 return TokError("minor version number required, comma expected"); 4672 4673 if (ParseAsAbsoluteExpression(Minor)) 4674 return TokError("invalid minor version"); 4675 4676 return false; 4677 } 4678 4679 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4680 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4681 return TokError("directive only supported for amdgcn architecture"); 4682 4683 std::string TargetIDDirective; 4684 SMLoc TargetStart = getTok().getLoc(); 4685 if (getParser().parseEscapedString(TargetIDDirective)) 4686 return true; 4687 4688 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4689 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4690 return getParser().Error(TargetRange.Start, 4691 (Twine(".amdgcn_target directive's target id ") + 4692 Twine(TargetIDDirective) + 4693 Twine(" does not match the specified target id ") + 4694 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4695 4696 return false; 4697 } 4698 4699 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4700 return Error(Range.Start, "value out of range", Range); 4701 } 4702 4703 bool AMDGPUAsmParser::calculateGPRBlocks( 4704 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4705 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4706 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4707 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4708 // TODO(scott.linder): These calculations are duplicated from 4709 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4710 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4711 4712 unsigned NumVGPRs = NextFreeVGPR; 4713 unsigned NumSGPRs = NextFreeSGPR; 4714 4715 if (Version.Major >= 10) 4716 NumSGPRs = 0; 4717 else { 4718 unsigned MaxAddressableNumSGPRs = 4719 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4720 4721 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4722 NumSGPRs > MaxAddressableNumSGPRs) 4723 return OutOfRangeError(SGPRRange); 4724 4725 NumSGPRs += 4726 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4727 4728 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4729 NumSGPRs > MaxAddressableNumSGPRs) 4730 return OutOfRangeError(SGPRRange); 4731 4732 if (Features.test(FeatureSGPRInitBug)) 4733 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4734 } 4735 4736 VGPRBlocks = 4737 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4738 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4739 4740 return false; 4741 } 4742 4743 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4744 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4745 return TokError("directive only supported for amdgcn architecture"); 4746 4747 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4748 return TokError("directive only supported for amdhsa OS"); 4749 4750 StringRef KernelName; 4751 if (getParser().parseIdentifier(KernelName)) 4752 return true; 4753 4754 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4755 4756 StringSet<> Seen; 4757 4758 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4759 4760 SMRange VGPRRange; 4761 uint64_t NextFreeVGPR = 0; 4762 uint64_t AccumOffset = 0; 4763 uint64_t SharedVGPRCount = 0; 4764 SMRange SGPRRange; 4765 uint64_t NextFreeSGPR = 0; 4766 4767 // Count the number of user SGPRs implied from the enabled feature bits. 4768 unsigned ImpliedUserSGPRCount = 0; 4769 4770 // Track if the asm explicitly contains the directive for the user SGPR 4771 // count. 4772 Optional<unsigned> ExplicitUserSGPRCount; 4773 bool ReserveVCC = true; 4774 bool ReserveFlatScr = true; 4775 Optional<bool> EnableWavefrontSize32; 4776 4777 while (true) { 4778 while (trySkipToken(AsmToken::EndOfStatement)); 4779 4780 StringRef ID; 4781 SMRange IDRange = getTok().getLocRange(); 4782 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4783 return true; 4784 4785 if (ID == ".end_amdhsa_kernel") 4786 break; 4787 4788 if (Seen.find(ID) != Seen.end()) 4789 return TokError(".amdhsa_ directives cannot be repeated"); 4790 Seen.insert(ID); 4791 4792 SMLoc ValStart = getLoc(); 4793 int64_t IVal; 4794 if (getParser().parseAbsoluteExpression(IVal)) 4795 return true; 4796 SMLoc ValEnd = getLoc(); 4797 SMRange ValRange = SMRange(ValStart, ValEnd); 4798 4799 if (IVal < 0) 4800 return OutOfRangeError(ValRange); 4801 4802 uint64_t Val = IVal; 4803 4804 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4805 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4806 return OutOfRangeError(RANGE); \ 4807 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4808 4809 if (ID == ".amdhsa_group_segment_fixed_size") { 4810 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4811 return OutOfRangeError(ValRange); 4812 KD.group_segment_fixed_size = Val; 4813 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4814 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4815 return OutOfRangeError(ValRange); 4816 KD.private_segment_fixed_size = Val; 4817 } else if (ID == ".amdhsa_kernarg_size") { 4818 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4819 return OutOfRangeError(ValRange); 4820 KD.kernarg_size = Val; 4821 } else if (ID == ".amdhsa_user_sgpr_count") { 4822 ExplicitUserSGPRCount = Val; 4823 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4824 if (hasArchitectedFlatScratch()) 4825 return Error(IDRange.Start, 4826 "directive is not supported with architected flat scratch", 4827 IDRange); 4828 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4829 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4830 Val, ValRange); 4831 if (Val) 4832 ImpliedUserSGPRCount += 4; 4833 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4834 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4835 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4836 ValRange); 4837 if (Val) 4838 ImpliedUserSGPRCount += 2; 4839 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4840 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4841 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4842 ValRange); 4843 if (Val) 4844 ImpliedUserSGPRCount += 2; 4845 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4846 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4847 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4848 Val, ValRange); 4849 if (Val) 4850 ImpliedUserSGPRCount += 2; 4851 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4852 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4853 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4854 ValRange); 4855 if (Val) 4856 ImpliedUserSGPRCount += 2; 4857 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4858 if (hasArchitectedFlatScratch()) 4859 return Error(IDRange.Start, 4860 "directive is not supported with architected flat scratch", 4861 IDRange); 4862 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4863 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4864 ValRange); 4865 if (Val) 4866 ImpliedUserSGPRCount += 2; 4867 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4868 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4869 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4870 Val, ValRange); 4871 if (Val) 4872 ImpliedUserSGPRCount += 1; 4873 } else if (ID == ".amdhsa_wavefront_size32") { 4874 if (IVersion.Major < 10) 4875 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4876 EnableWavefrontSize32 = Val; 4877 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4878 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4879 Val, ValRange); 4880 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4881 if (hasArchitectedFlatScratch()) 4882 return Error(IDRange.Start, 4883 "directive is not supported with architected flat scratch", 4884 IDRange); 4885 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4886 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4887 } else if (ID == ".amdhsa_enable_private_segment") { 4888 if (!hasArchitectedFlatScratch()) 4889 return Error( 4890 IDRange.Start, 4891 "directive is not supported without architected flat scratch", 4892 IDRange); 4893 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4894 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4895 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4896 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4897 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4898 ValRange); 4899 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4900 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4901 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4902 ValRange); 4903 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4904 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4905 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4906 ValRange); 4907 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4908 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4909 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4910 ValRange); 4911 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4912 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4913 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4914 ValRange); 4915 } else if (ID == ".amdhsa_next_free_vgpr") { 4916 VGPRRange = ValRange; 4917 NextFreeVGPR = Val; 4918 } else if (ID == ".amdhsa_next_free_sgpr") { 4919 SGPRRange = ValRange; 4920 NextFreeSGPR = Val; 4921 } else if (ID == ".amdhsa_accum_offset") { 4922 if (!isGFX90A()) 4923 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4924 AccumOffset = Val; 4925 } else if (ID == ".amdhsa_reserve_vcc") { 4926 if (!isUInt<1>(Val)) 4927 return OutOfRangeError(ValRange); 4928 ReserveVCC = Val; 4929 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4930 if (IVersion.Major < 7) 4931 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4932 if (hasArchitectedFlatScratch()) 4933 return Error(IDRange.Start, 4934 "directive is not supported with architected flat scratch", 4935 IDRange); 4936 if (!isUInt<1>(Val)) 4937 return OutOfRangeError(ValRange); 4938 ReserveFlatScr = Val; 4939 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4940 if (IVersion.Major < 8) 4941 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4942 if (!isUInt<1>(Val)) 4943 return OutOfRangeError(ValRange); 4944 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4945 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4946 IDRange); 4947 } else if (ID == ".amdhsa_float_round_mode_32") { 4948 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4949 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4950 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4951 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4952 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4953 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4954 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4955 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4956 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4957 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4958 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4959 ValRange); 4960 } else if (ID == ".amdhsa_dx10_clamp") { 4961 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4962 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4963 } else if (ID == ".amdhsa_ieee_mode") { 4964 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4965 Val, ValRange); 4966 } else if (ID == ".amdhsa_fp16_overflow") { 4967 if (IVersion.Major < 9) 4968 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4969 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4970 ValRange); 4971 } else if (ID == ".amdhsa_tg_split") { 4972 if (!isGFX90A()) 4973 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4974 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4975 ValRange); 4976 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4977 if (IVersion.Major < 10) 4978 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4979 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4980 ValRange); 4981 } else if (ID == ".amdhsa_memory_ordered") { 4982 if (IVersion.Major < 10) 4983 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4984 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4985 ValRange); 4986 } else if (ID == ".amdhsa_forward_progress") { 4987 if (IVersion.Major < 10) 4988 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4989 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4990 ValRange); 4991 } else if (ID == ".amdhsa_shared_vgpr_count") { 4992 if (IVersion.Major < 10) 4993 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4994 SharedVGPRCount = Val; 4995 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4996 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4997 ValRange); 4998 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4999 PARSE_BITS_ENTRY( 5000 KD.compute_pgm_rsrc2, 5001 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5002 ValRange); 5003 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5004 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5005 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5006 Val, ValRange); 5007 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5008 PARSE_BITS_ENTRY( 5009 KD.compute_pgm_rsrc2, 5010 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5011 ValRange); 5012 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5013 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5014 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5015 Val, ValRange); 5016 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5017 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5018 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5019 Val, ValRange); 5020 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5021 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5022 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5023 Val, ValRange); 5024 } else if (ID == ".amdhsa_exception_int_div_zero") { 5025 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5026 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5027 Val, ValRange); 5028 } else { 5029 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5030 } 5031 5032 #undef PARSE_BITS_ENTRY 5033 } 5034 5035 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5036 return TokError(".amdhsa_next_free_vgpr directive is required"); 5037 5038 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5039 return TokError(".amdhsa_next_free_sgpr directive is required"); 5040 5041 unsigned VGPRBlocks; 5042 unsigned SGPRBlocks; 5043 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5044 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5045 EnableWavefrontSize32, NextFreeVGPR, 5046 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5047 SGPRBlocks)) 5048 return true; 5049 5050 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5051 VGPRBlocks)) 5052 return OutOfRangeError(VGPRRange); 5053 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5054 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5055 5056 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5057 SGPRBlocks)) 5058 return OutOfRangeError(SGPRRange); 5059 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5060 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5061 SGPRBlocks); 5062 5063 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5064 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5065 "enabled user SGPRs"); 5066 5067 unsigned UserSGPRCount = 5068 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5069 5070 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5071 return TokError("too many user SGPRs enabled"); 5072 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5073 UserSGPRCount); 5074 5075 if (isGFX90A()) { 5076 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5077 return TokError(".amdhsa_accum_offset directive is required"); 5078 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5079 return TokError("accum_offset should be in range [4..256] in " 5080 "increments of 4"); 5081 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5082 return TokError("accum_offset exceeds total VGPR allocation"); 5083 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5084 (AccumOffset / 4 - 1)); 5085 } 5086 5087 if (IVersion.Major == 10) { 5088 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5089 if (SharedVGPRCount && EnableWavefrontSize32) { 5090 return TokError("shared_vgpr_count directive not valid on " 5091 "wavefront size 32"); 5092 } 5093 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5094 return TokError("shared_vgpr_count*2 + " 5095 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5096 "exceed 63\n"); 5097 } 5098 } 5099 5100 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5101 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5102 ReserveFlatScr); 5103 return false; 5104 } 5105 5106 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5107 uint32_t Major; 5108 uint32_t Minor; 5109 5110 if (ParseDirectiveMajorMinor(Major, Minor)) 5111 return true; 5112 5113 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5114 return false; 5115 } 5116 5117 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5118 uint32_t Major; 5119 uint32_t Minor; 5120 uint32_t Stepping; 5121 StringRef VendorName; 5122 StringRef ArchName; 5123 5124 // If this directive has no arguments, then use the ISA version for the 5125 // targeted GPU. 5126 if (isToken(AsmToken::EndOfStatement)) { 5127 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5128 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5129 ISA.Stepping, 5130 "AMD", "AMDGPU"); 5131 return false; 5132 } 5133 5134 if (ParseDirectiveMajorMinor(Major, Minor)) 5135 return true; 5136 5137 if (!trySkipToken(AsmToken::Comma)) 5138 return TokError("stepping version number required, comma expected"); 5139 5140 if (ParseAsAbsoluteExpression(Stepping)) 5141 return TokError("invalid stepping version"); 5142 5143 if (!trySkipToken(AsmToken::Comma)) 5144 return TokError("vendor name required, comma expected"); 5145 5146 if (!parseString(VendorName, "invalid vendor name")) 5147 return true; 5148 5149 if (!trySkipToken(AsmToken::Comma)) 5150 return TokError("arch name required, comma expected"); 5151 5152 if (!parseString(ArchName, "invalid arch name")) 5153 return true; 5154 5155 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5156 VendorName, ArchName); 5157 return false; 5158 } 5159 5160 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5161 amd_kernel_code_t &Header) { 5162 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5163 // assembly for backwards compatibility. 5164 if (ID == "max_scratch_backing_memory_byte_size") { 5165 Parser.eatToEndOfStatement(); 5166 return false; 5167 } 5168 5169 SmallString<40> ErrStr; 5170 raw_svector_ostream Err(ErrStr); 5171 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5172 return TokError(Err.str()); 5173 } 5174 Lex(); 5175 5176 if (ID == "enable_wavefront_size32") { 5177 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5178 if (!isGFX10Plus()) 5179 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5180 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5181 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5182 } else { 5183 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5184 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5185 } 5186 } 5187 5188 if (ID == "wavefront_size") { 5189 if (Header.wavefront_size == 5) { 5190 if (!isGFX10Plus()) 5191 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5192 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5193 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5194 } else if (Header.wavefront_size == 6) { 5195 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5196 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5197 } 5198 } 5199 5200 if (ID == "enable_wgp_mode") { 5201 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5202 !isGFX10Plus()) 5203 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5204 } 5205 5206 if (ID == "enable_mem_ordered") { 5207 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5208 !isGFX10Plus()) 5209 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5210 } 5211 5212 if (ID == "enable_fwd_progress") { 5213 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5214 !isGFX10Plus()) 5215 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5216 } 5217 5218 return false; 5219 } 5220 5221 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5222 amd_kernel_code_t Header; 5223 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5224 5225 while (true) { 5226 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5227 // will set the current token to EndOfStatement. 5228 while(trySkipToken(AsmToken::EndOfStatement)); 5229 5230 StringRef ID; 5231 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5232 return true; 5233 5234 if (ID == ".end_amd_kernel_code_t") 5235 break; 5236 5237 if (ParseAMDKernelCodeTValue(ID, Header)) 5238 return true; 5239 } 5240 5241 getTargetStreamer().EmitAMDKernelCodeT(Header); 5242 5243 return false; 5244 } 5245 5246 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5247 StringRef KernelName; 5248 if (!parseId(KernelName, "expected symbol name")) 5249 return true; 5250 5251 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5252 ELF::STT_AMDGPU_HSA_KERNEL); 5253 5254 KernelScope.initialize(getContext()); 5255 return false; 5256 } 5257 5258 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5259 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5260 return Error(getLoc(), 5261 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5262 "architectures"); 5263 } 5264 5265 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5266 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5267 return Error(getParser().getTok().getLoc(), "target id must match options"); 5268 5269 getTargetStreamer().EmitISAVersion(); 5270 Lex(); 5271 5272 return false; 5273 } 5274 5275 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5276 const char *AssemblerDirectiveBegin; 5277 const char *AssemblerDirectiveEnd; 5278 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5279 isHsaAbiVersion3AndAbove(&getSTI()) 5280 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5281 HSAMD::V3::AssemblerDirectiveEnd) 5282 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5283 HSAMD::AssemblerDirectiveEnd); 5284 5285 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5286 return Error(getLoc(), 5287 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5288 "not available on non-amdhsa OSes")).str()); 5289 } 5290 5291 std::string HSAMetadataString; 5292 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5293 HSAMetadataString)) 5294 return true; 5295 5296 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5297 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5298 return Error(getLoc(), "invalid HSA metadata"); 5299 } else { 5300 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5301 return Error(getLoc(), "invalid HSA metadata"); 5302 } 5303 5304 return false; 5305 } 5306 5307 /// Common code to parse out a block of text (typically YAML) between start and 5308 /// end directives. 5309 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5310 const char *AssemblerDirectiveEnd, 5311 std::string &CollectString) { 5312 5313 raw_string_ostream CollectStream(CollectString); 5314 5315 getLexer().setSkipSpace(false); 5316 5317 bool FoundEnd = false; 5318 while (!isToken(AsmToken::Eof)) { 5319 while (isToken(AsmToken::Space)) { 5320 CollectStream << getTokenStr(); 5321 Lex(); 5322 } 5323 5324 if (trySkipId(AssemblerDirectiveEnd)) { 5325 FoundEnd = true; 5326 break; 5327 } 5328 5329 CollectStream << Parser.parseStringToEndOfStatement() 5330 << getContext().getAsmInfo()->getSeparatorString(); 5331 5332 Parser.eatToEndOfStatement(); 5333 } 5334 5335 getLexer().setSkipSpace(true); 5336 5337 if (isToken(AsmToken::Eof) && !FoundEnd) { 5338 return TokError(Twine("expected directive ") + 5339 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5340 } 5341 5342 CollectStream.flush(); 5343 return false; 5344 } 5345 5346 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5347 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5348 std::string String; 5349 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5350 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5351 return true; 5352 5353 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5354 if (!PALMetadata->setFromString(String)) 5355 return Error(getLoc(), "invalid PAL metadata"); 5356 return false; 5357 } 5358 5359 /// Parse the assembler directive for old linear-format PAL metadata. 5360 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5361 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5362 return Error(getLoc(), 5363 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5364 "not available on non-amdpal OSes")).str()); 5365 } 5366 5367 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5368 PALMetadata->setLegacy(); 5369 for (;;) { 5370 uint32_t Key, Value; 5371 if (ParseAsAbsoluteExpression(Key)) { 5372 return TokError(Twine("invalid value in ") + 5373 Twine(PALMD::AssemblerDirective)); 5374 } 5375 if (!trySkipToken(AsmToken::Comma)) { 5376 return TokError(Twine("expected an even number of values in ") + 5377 Twine(PALMD::AssemblerDirective)); 5378 } 5379 if (ParseAsAbsoluteExpression(Value)) { 5380 return TokError(Twine("invalid value in ") + 5381 Twine(PALMD::AssemblerDirective)); 5382 } 5383 PALMetadata->setRegister(Key, Value); 5384 if (!trySkipToken(AsmToken::Comma)) 5385 break; 5386 } 5387 return false; 5388 } 5389 5390 /// ParseDirectiveAMDGPULDS 5391 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5392 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5393 if (getParser().checkForValidSection()) 5394 return true; 5395 5396 StringRef Name; 5397 SMLoc NameLoc = getLoc(); 5398 if (getParser().parseIdentifier(Name)) 5399 return TokError("expected identifier in directive"); 5400 5401 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5402 if (parseToken(AsmToken::Comma, "expected ','")) 5403 return true; 5404 5405 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5406 5407 int64_t Size; 5408 SMLoc SizeLoc = getLoc(); 5409 if (getParser().parseAbsoluteExpression(Size)) 5410 return true; 5411 if (Size < 0) 5412 return Error(SizeLoc, "size must be non-negative"); 5413 if (Size > LocalMemorySize) 5414 return Error(SizeLoc, "size is too large"); 5415 5416 int64_t Alignment = 4; 5417 if (trySkipToken(AsmToken::Comma)) { 5418 SMLoc AlignLoc = getLoc(); 5419 if (getParser().parseAbsoluteExpression(Alignment)) 5420 return true; 5421 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5422 return Error(AlignLoc, "alignment must be a power of two"); 5423 5424 // Alignment larger than the size of LDS is possible in theory, as long 5425 // as the linker manages to place to symbol at address 0, but we do want 5426 // to make sure the alignment fits nicely into a 32-bit integer. 5427 if (Alignment >= 1u << 31) 5428 return Error(AlignLoc, "alignment is too large"); 5429 } 5430 5431 if (parseToken(AsmToken::EndOfStatement, 5432 "unexpected token in '.amdgpu_lds' directive")) 5433 return true; 5434 5435 Symbol->redefineIfPossible(); 5436 if (!Symbol->isUndefined()) 5437 return Error(NameLoc, "invalid symbol redefinition"); 5438 5439 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5440 return false; 5441 } 5442 5443 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5444 StringRef IDVal = DirectiveID.getString(); 5445 5446 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5447 if (IDVal == ".amdhsa_kernel") 5448 return ParseDirectiveAMDHSAKernel(); 5449 5450 // TODO: Restructure/combine with PAL metadata directive. 5451 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5452 return ParseDirectiveHSAMetadata(); 5453 } else { 5454 if (IDVal == ".hsa_code_object_version") 5455 return ParseDirectiveHSACodeObjectVersion(); 5456 5457 if (IDVal == ".hsa_code_object_isa") 5458 return ParseDirectiveHSACodeObjectISA(); 5459 5460 if (IDVal == ".amd_kernel_code_t") 5461 return ParseDirectiveAMDKernelCodeT(); 5462 5463 if (IDVal == ".amdgpu_hsa_kernel") 5464 return ParseDirectiveAMDGPUHsaKernel(); 5465 5466 if (IDVal == ".amd_amdgpu_isa") 5467 return ParseDirectiveISAVersion(); 5468 5469 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5470 return ParseDirectiveHSAMetadata(); 5471 } 5472 5473 if (IDVal == ".amdgcn_target") 5474 return ParseDirectiveAMDGCNTarget(); 5475 5476 if (IDVal == ".amdgpu_lds") 5477 return ParseDirectiveAMDGPULDS(); 5478 5479 if (IDVal == PALMD::AssemblerDirectiveBegin) 5480 return ParseDirectivePALMetadataBegin(); 5481 5482 if (IDVal == PALMD::AssemblerDirective) 5483 return ParseDirectivePALMetadata(); 5484 5485 return true; 5486 } 5487 5488 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5489 unsigned RegNo) { 5490 5491 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5492 return isGFX9Plus(); 5493 5494 // GFX10 has 2 more SGPRs 104 and 105. 5495 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5496 return hasSGPR104_SGPR105(); 5497 5498 switch (RegNo) { 5499 case AMDGPU::SRC_SHARED_BASE: 5500 case AMDGPU::SRC_SHARED_LIMIT: 5501 case AMDGPU::SRC_PRIVATE_BASE: 5502 case AMDGPU::SRC_PRIVATE_LIMIT: 5503 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5504 return isGFX9Plus(); 5505 case AMDGPU::TBA: 5506 case AMDGPU::TBA_LO: 5507 case AMDGPU::TBA_HI: 5508 case AMDGPU::TMA: 5509 case AMDGPU::TMA_LO: 5510 case AMDGPU::TMA_HI: 5511 return !isGFX9Plus(); 5512 case AMDGPU::XNACK_MASK: 5513 case AMDGPU::XNACK_MASK_LO: 5514 case AMDGPU::XNACK_MASK_HI: 5515 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5516 case AMDGPU::SGPR_NULL: 5517 return isGFX10Plus(); 5518 default: 5519 break; 5520 } 5521 5522 if (isCI()) 5523 return true; 5524 5525 if (isSI() || isGFX10Plus()) { 5526 // No flat_scr on SI. 5527 // On GFX10 flat scratch is not a valid register operand and can only be 5528 // accessed with s_setreg/s_getreg. 5529 switch (RegNo) { 5530 case AMDGPU::FLAT_SCR: 5531 case AMDGPU::FLAT_SCR_LO: 5532 case AMDGPU::FLAT_SCR_HI: 5533 return false; 5534 default: 5535 return true; 5536 } 5537 } 5538 5539 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5540 // SI/CI have. 5541 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5542 return hasSGPR102_SGPR103(); 5543 5544 return true; 5545 } 5546 5547 OperandMatchResultTy 5548 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5549 OperandMode Mode) { 5550 // Try to parse with a custom parser 5551 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5552 5553 // If we successfully parsed the operand or if there as an error parsing, 5554 // we are done. 5555 // 5556 // If we are parsing after we reach EndOfStatement then this means we 5557 // are appending default values to the Operands list. This is only done 5558 // by custom parser, so we shouldn't continue on to the generic parsing. 5559 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5560 isToken(AsmToken::EndOfStatement)) 5561 return ResTy; 5562 5563 SMLoc RBraceLoc; 5564 SMLoc LBraceLoc = getLoc(); 5565 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5566 unsigned Prefix = Operands.size(); 5567 5568 for (;;) { 5569 auto Loc = getLoc(); 5570 ResTy = parseReg(Operands); 5571 if (ResTy == MatchOperand_NoMatch) 5572 Error(Loc, "expected a register"); 5573 if (ResTy != MatchOperand_Success) 5574 return MatchOperand_ParseFail; 5575 5576 RBraceLoc = getLoc(); 5577 if (trySkipToken(AsmToken::RBrac)) 5578 break; 5579 5580 if (!skipToken(AsmToken::Comma, 5581 "expected a comma or a closing square bracket")) { 5582 return MatchOperand_ParseFail; 5583 } 5584 } 5585 5586 if (Operands.size() - Prefix > 1) { 5587 Operands.insert(Operands.begin() + Prefix, 5588 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5589 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5590 } 5591 5592 return MatchOperand_Success; 5593 } 5594 5595 return parseRegOrImm(Operands); 5596 } 5597 5598 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5599 // Clear any forced encodings from the previous instruction. 5600 setForcedEncodingSize(0); 5601 setForcedDPP(false); 5602 setForcedSDWA(false); 5603 5604 if (Name.endswith("_e64")) { 5605 setForcedEncodingSize(64); 5606 return Name.substr(0, Name.size() - 4); 5607 } else if (Name.endswith("_e32")) { 5608 setForcedEncodingSize(32); 5609 return Name.substr(0, Name.size() - 4); 5610 } else if (Name.endswith("_dpp")) { 5611 setForcedDPP(true); 5612 return Name.substr(0, Name.size() - 4); 5613 } else if (Name.endswith("_sdwa")) { 5614 setForcedSDWA(true); 5615 return Name.substr(0, Name.size() - 5); 5616 } 5617 return Name; 5618 } 5619 5620 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5621 StringRef Name, 5622 SMLoc NameLoc, OperandVector &Operands) { 5623 // Add the instruction mnemonic 5624 Name = parseMnemonicSuffix(Name); 5625 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5626 5627 bool IsMIMG = Name.startswith("image_"); 5628 5629 while (!trySkipToken(AsmToken::EndOfStatement)) { 5630 OperandMode Mode = OperandMode_Default; 5631 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5632 Mode = OperandMode_NSA; 5633 CPolSeen = 0; 5634 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5635 5636 if (Res != MatchOperand_Success) { 5637 checkUnsupportedInstruction(Name, NameLoc); 5638 if (!Parser.hasPendingError()) { 5639 // FIXME: use real operand location rather than the current location. 5640 StringRef Msg = 5641 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5642 "not a valid operand."; 5643 Error(getLoc(), Msg); 5644 } 5645 while (!trySkipToken(AsmToken::EndOfStatement)) { 5646 lex(); 5647 } 5648 return true; 5649 } 5650 5651 // Eat the comma or space if there is one. 5652 trySkipToken(AsmToken::Comma); 5653 } 5654 5655 return false; 5656 } 5657 5658 //===----------------------------------------------------------------------===// 5659 // Utility functions 5660 //===----------------------------------------------------------------------===// 5661 5662 OperandMatchResultTy 5663 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5664 5665 if (!trySkipId(Prefix, AsmToken::Colon)) 5666 return MatchOperand_NoMatch; 5667 5668 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5669 } 5670 5671 OperandMatchResultTy 5672 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5673 AMDGPUOperand::ImmTy ImmTy, 5674 bool (*ConvertResult)(int64_t&)) { 5675 SMLoc S = getLoc(); 5676 int64_t Value = 0; 5677 5678 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5679 if (Res != MatchOperand_Success) 5680 return Res; 5681 5682 if (ConvertResult && !ConvertResult(Value)) { 5683 Error(S, "invalid " + StringRef(Prefix) + " value."); 5684 } 5685 5686 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5687 return MatchOperand_Success; 5688 } 5689 5690 OperandMatchResultTy 5691 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5692 OperandVector &Operands, 5693 AMDGPUOperand::ImmTy ImmTy, 5694 bool (*ConvertResult)(int64_t&)) { 5695 SMLoc S = getLoc(); 5696 if (!trySkipId(Prefix, AsmToken::Colon)) 5697 return MatchOperand_NoMatch; 5698 5699 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5700 return MatchOperand_ParseFail; 5701 5702 unsigned Val = 0; 5703 const unsigned MaxSize = 4; 5704 5705 // FIXME: How to verify the number of elements matches the number of src 5706 // operands? 5707 for (int I = 0; ; ++I) { 5708 int64_t Op; 5709 SMLoc Loc = getLoc(); 5710 if (!parseExpr(Op)) 5711 return MatchOperand_ParseFail; 5712 5713 if (Op != 0 && Op != 1) { 5714 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5715 return MatchOperand_ParseFail; 5716 } 5717 5718 Val |= (Op << I); 5719 5720 if (trySkipToken(AsmToken::RBrac)) 5721 break; 5722 5723 if (I + 1 == MaxSize) { 5724 Error(getLoc(), "expected a closing square bracket"); 5725 return MatchOperand_ParseFail; 5726 } 5727 5728 if (!skipToken(AsmToken::Comma, "expected a comma")) 5729 return MatchOperand_ParseFail; 5730 } 5731 5732 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5733 return MatchOperand_Success; 5734 } 5735 5736 OperandMatchResultTy 5737 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5738 AMDGPUOperand::ImmTy ImmTy) { 5739 int64_t Bit; 5740 SMLoc S = getLoc(); 5741 5742 if (trySkipId(Name)) { 5743 Bit = 1; 5744 } else if (trySkipId("no", Name)) { 5745 Bit = 0; 5746 } else { 5747 return MatchOperand_NoMatch; 5748 } 5749 5750 if (Name == "r128" && !hasMIMG_R128()) { 5751 Error(S, "r128 modifier is not supported on this GPU"); 5752 return MatchOperand_ParseFail; 5753 } 5754 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5755 Error(S, "a16 modifier is not supported on this GPU"); 5756 return MatchOperand_ParseFail; 5757 } 5758 5759 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5760 ImmTy = AMDGPUOperand::ImmTyR128A16; 5761 5762 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5763 return MatchOperand_Success; 5764 } 5765 5766 OperandMatchResultTy 5767 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5768 unsigned CPolOn = 0; 5769 unsigned CPolOff = 0; 5770 SMLoc S = getLoc(); 5771 5772 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5773 if (isGFX940() && !Mnemo.startswith("s_")) { 5774 if (trySkipId("sc0")) 5775 CPolOn = AMDGPU::CPol::SC0; 5776 else if (trySkipId("nosc0")) 5777 CPolOff = AMDGPU::CPol::SC0; 5778 else if (trySkipId("nt")) 5779 CPolOn = AMDGPU::CPol::NT; 5780 else if (trySkipId("nont")) 5781 CPolOff = AMDGPU::CPol::NT; 5782 else if (trySkipId("sc1")) 5783 CPolOn = AMDGPU::CPol::SC1; 5784 else if (trySkipId("nosc1")) 5785 CPolOff = AMDGPU::CPol::SC1; 5786 else 5787 return MatchOperand_NoMatch; 5788 } 5789 else if (trySkipId("glc")) 5790 CPolOn = AMDGPU::CPol::GLC; 5791 else if (trySkipId("noglc")) 5792 CPolOff = AMDGPU::CPol::GLC; 5793 else if (trySkipId("slc")) 5794 CPolOn = AMDGPU::CPol::SLC; 5795 else if (trySkipId("noslc")) 5796 CPolOff = AMDGPU::CPol::SLC; 5797 else if (trySkipId("dlc")) 5798 CPolOn = AMDGPU::CPol::DLC; 5799 else if (trySkipId("nodlc")) 5800 CPolOff = AMDGPU::CPol::DLC; 5801 else if (trySkipId("scc")) 5802 CPolOn = AMDGPU::CPol::SCC; 5803 else if (trySkipId("noscc")) 5804 CPolOff = AMDGPU::CPol::SCC; 5805 else 5806 return MatchOperand_NoMatch; 5807 5808 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5809 Error(S, "dlc modifier is not supported on this GPU"); 5810 return MatchOperand_ParseFail; 5811 } 5812 5813 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5814 Error(S, "scc modifier is not supported on this GPU"); 5815 return MatchOperand_ParseFail; 5816 } 5817 5818 if (CPolSeen & (CPolOn | CPolOff)) { 5819 Error(S, "duplicate cache policy modifier"); 5820 return MatchOperand_ParseFail; 5821 } 5822 5823 CPolSeen |= (CPolOn | CPolOff); 5824 5825 for (unsigned I = 1; I != Operands.size(); ++I) { 5826 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5827 if (Op.isCPol()) { 5828 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5829 return MatchOperand_Success; 5830 } 5831 } 5832 5833 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5834 AMDGPUOperand::ImmTyCPol)); 5835 5836 return MatchOperand_Success; 5837 } 5838 5839 static void addOptionalImmOperand( 5840 MCInst& Inst, const OperandVector& Operands, 5841 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5842 AMDGPUOperand::ImmTy ImmT, 5843 int64_t Default = 0) { 5844 auto i = OptionalIdx.find(ImmT); 5845 if (i != OptionalIdx.end()) { 5846 unsigned Idx = i->second; 5847 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5848 } else { 5849 Inst.addOperand(MCOperand::createImm(Default)); 5850 } 5851 } 5852 5853 OperandMatchResultTy 5854 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5855 StringRef &Value, 5856 SMLoc &StringLoc) { 5857 if (!trySkipId(Prefix, AsmToken::Colon)) 5858 return MatchOperand_NoMatch; 5859 5860 StringLoc = getLoc(); 5861 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5862 : MatchOperand_ParseFail; 5863 } 5864 5865 //===----------------------------------------------------------------------===// 5866 // MTBUF format 5867 //===----------------------------------------------------------------------===// 5868 5869 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5870 int64_t MaxVal, 5871 int64_t &Fmt) { 5872 int64_t Val; 5873 SMLoc Loc = getLoc(); 5874 5875 auto Res = parseIntWithPrefix(Pref, Val); 5876 if (Res == MatchOperand_ParseFail) 5877 return false; 5878 if (Res == MatchOperand_NoMatch) 5879 return true; 5880 5881 if (Val < 0 || Val > MaxVal) { 5882 Error(Loc, Twine("out of range ", StringRef(Pref))); 5883 return false; 5884 } 5885 5886 Fmt = Val; 5887 return true; 5888 } 5889 5890 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5891 // values to live in a joint format operand in the MCInst encoding. 5892 OperandMatchResultTy 5893 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5894 using namespace llvm::AMDGPU::MTBUFFormat; 5895 5896 int64_t Dfmt = DFMT_UNDEF; 5897 int64_t Nfmt = NFMT_UNDEF; 5898 5899 // dfmt and nfmt can appear in either order, and each is optional. 5900 for (int I = 0; I < 2; ++I) { 5901 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5902 return MatchOperand_ParseFail; 5903 5904 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5905 return MatchOperand_ParseFail; 5906 } 5907 // Skip optional comma between dfmt/nfmt 5908 // but guard against 2 commas following each other. 5909 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5910 !peekToken().is(AsmToken::Comma)) { 5911 trySkipToken(AsmToken::Comma); 5912 } 5913 } 5914 5915 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5916 return MatchOperand_NoMatch; 5917 5918 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5919 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5920 5921 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5922 return MatchOperand_Success; 5923 } 5924 5925 OperandMatchResultTy 5926 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5927 using namespace llvm::AMDGPU::MTBUFFormat; 5928 5929 int64_t Fmt = UFMT_UNDEF; 5930 5931 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5932 return MatchOperand_ParseFail; 5933 5934 if (Fmt == UFMT_UNDEF) 5935 return MatchOperand_NoMatch; 5936 5937 Format = Fmt; 5938 return MatchOperand_Success; 5939 } 5940 5941 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5942 int64_t &Nfmt, 5943 StringRef FormatStr, 5944 SMLoc Loc) { 5945 using namespace llvm::AMDGPU::MTBUFFormat; 5946 int64_t Format; 5947 5948 Format = getDfmt(FormatStr); 5949 if (Format != DFMT_UNDEF) { 5950 Dfmt = Format; 5951 return true; 5952 } 5953 5954 Format = getNfmt(FormatStr, getSTI()); 5955 if (Format != NFMT_UNDEF) { 5956 Nfmt = Format; 5957 return true; 5958 } 5959 5960 Error(Loc, "unsupported format"); 5961 return false; 5962 } 5963 5964 OperandMatchResultTy 5965 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5966 SMLoc FormatLoc, 5967 int64_t &Format) { 5968 using namespace llvm::AMDGPU::MTBUFFormat; 5969 5970 int64_t Dfmt = DFMT_UNDEF; 5971 int64_t Nfmt = NFMT_UNDEF; 5972 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5973 return MatchOperand_ParseFail; 5974 5975 if (trySkipToken(AsmToken::Comma)) { 5976 StringRef Str; 5977 SMLoc Loc = getLoc(); 5978 if (!parseId(Str, "expected a format string") || 5979 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5980 return MatchOperand_ParseFail; 5981 } 5982 if (Dfmt == DFMT_UNDEF) { 5983 Error(Loc, "duplicate numeric format"); 5984 return MatchOperand_ParseFail; 5985 } else if (Nfmt == NFMT_UNDEF) { 5986 Error(Loc, "duplicate data format"); 5987 return MatchOperand_ParseFail; 5988 } 5989 } 5990 5991 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5992 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5993 5994 if (isGFX10Plus()) { 5995 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5996 if (Ufmt == UFMT_UNDEF) { 5997 Error(FormatLoc, "unsupported format"); 5998 return MatchOperand_ParseFail; 5999 } 6000 Format = Ufmt; 6001 } else { 6002 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6003 } 6004 6005 return MatchOperand_Success; 6006 } 6007 6008 OperandMatchResultTy 6009 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6010 SMLoc Loc, 6011 int64_t &Format) { 6012 using namespace llvm::AMDGPU::MTBUFFormat; 6013 6014 auto Id = getUnifiedFormat(FormatStr); 6015 if (Id == UFMT_UNDEF) 6016 return MatchOperand_NoMatch; 6017 6018 if (!isGFX10Plus()) { 6019 Error(Loc, "unified format is not supported on this GPU"); 6020 return MatchOperand_ParseFail; 6021 } 6022 6023 Format = Id; 6024 return MatchOperand_Success; 6025 } 6026 6027 OperandMatchResultTy 6028 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6029 using namespace llvm::AMDGPU::MTBUFFormat; 6030 SMLoc Loc = getLoc(); 6031 6032 if (!parseExpr(Format)) 6033 return MatchOperand_ParseFail; 6034 if (!isValidFormatEncoding(Format, getSTI())) { 6035 Error(Loc, "out of range format"); 6036 return MatchOperand_ParseFail; 6037 } 6038 6039 return MatchOperand_Success; 6040 } 6041 6042 OperandMatchResultTy 6043 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6044 using namespace llvm::AMDGPU::MTBUFFormat; 6045 6046 if (!trySkipId("format", AsmToken::Colon)) 6047 return MatchOperand_NoMatch; 6048 6049 if (trySkipToken(AsmToken::LBrac)) { 6050 StringRef FormatStr; 6051 SMLoc Loc = getLoc(); 6052 if (!parseId(FormatStr, "expected a format string")) 6053 return MatchOperand_ParseFail; 6054 6055 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6056 if (Res == MatchOperand_NoMatch) 6057 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6058 if (Res != MatchOperand_Success) 6059 return Res; 6060 6061 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6062 return MatchOperand_ParseFail; 6063 6064 return MatchOperand_Success; 6065 } 6066 6067 return parseNumericFormat(Format); 6068 } 6069 6070 OperandMatchResultTy 6071 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6072 using namespace llvm::AMDGPU::MTBUFFormat; 6073 6074 int64_t Format = getDefaultFormatEncoding(getSTI()); 6075 OperandMatchResultTy Res; 6076 SMLoc Loc = getLoc(); 6077 6078 // Parse legacy format syntax. 6079 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6080 if (Res == MatchOperand_ParseFail) 6081 return Res; 6082 6083 bool FormatFound = (Res == MatchOperand_Success); 6084 6085 Operands.push_back( 6086 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6087 6088 if (FormatFound) 6089 trySkipToken(AsmToken::Comma); 6090 6091 if (isToken(AsmToken::EndOfStatement)) { 6092 // We are expecting an soffset operand, 6093 // but let matcher handle the error. 6094 return MatchOperand_Success; 6095 } 6096 6097 // Parse soffset. 6098 Res = parseRegOrImm(Operands); 6099 if (Res != MatchOperand_Success) 6100 return Res; 6101 6102 trySkipToken(AsmToken::Comma); 6103 6104 if (!FormatFound) { 6105 Res = parseSymbolicOrNumericFormat(Format); 6106 if (Res == MatchOperand_ParseFail) 6107 return Res; 6108 if (Res == MatchOperand_Success) { 6109 auto Size = Operands.size(); 6110 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6111 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6112 Op.setImm(Format); 6113 } 6114 return MatchOperand_Success; 6115 } 6116 6117 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6118 Error(getLoc(), "duplicate format"); 6119 return MatchOperand_ParseFail; 6120 } 6121 return MatchOperand_Success; 6122 } 6123 6124 //===----------------------------------------------------------------------===// 6125 // ds 6126 //===----------------------------------------------------------------------===// 6127 6128 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6129 const OperandVector &Operands) { 6130 OptionalImmIndexMap OptionalIdx; 6131 6132 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6133 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6134 6135 // Add the register arguments 6136 if (Op.isReg()) { 6137 Op.addRegOperands(Inst, 1); 6138 continue; 6139 } 6140 6141 // Handle optional arguments 6142 OptionalIdx[Op.getImmTy()] = i; 6143 } 6144 6145 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6146 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6147 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6148 6149 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6150 } 6151 6152 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6153 bool IsGdsHardcoded) { 6154 OptionalImmIndexMap OptionalIdx; 6155 6156 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6157 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6158 6159 // Add the register arguments 6160 if (Op.isReg()) { 6161 Op.addRegOperands(Inst, 1); 6162 continue; 6163 } 6164 6165 if (Op.isToken() && Op.getToken() == "gds") { 6166 IsGdsHardcoded = true; 6167 continue; 6168 } 6169 6170 // Handle optional arguments 6171 OptionalIdx[Op.getImmTy()] = i; 6172 } 6173 6174 AMDGPUOperand::ImmTy OffsetType = 6175 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6176 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6177 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6178 AMDGPUOperand::ImmTyOffset; 6179 6180 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6181 6182 if (!IsGdsHardcoded) { 6183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6184 } 6185 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6186 } 6187 6188 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6189 OptionalImmIndexMap OptionalIdx; 6190 6191 unsigned OperandIdx[4]; 6192 unsigned EnMask = 0; 6193 int SrcIdx = 0; 6194 6195 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6196 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6197 6198 // Add the register arguments 6199 if (Op.isReg()) { 6200 assert(SrcIdx < 4); 6201 OperandIdx[SrcIdx] = Inst.size(); 6202 Op.addRegOperands(Inst, 1); 6203 ++SrcIdx; 6204 continue; 6205 } 6206 6207 if (Op.isOff()) { 6208 assert(SrcIdx < 4); 6209 OperandIdx[SrcIdx] = Inst.size(); 6210 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6211 ++SrcIdx; 6212 continue; 6213 } 6214 6215 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6216 Op.addImmOperands(Inst, 1); 6217 continue; 6218 } 6219 6220 if (Op.isToken() && Op.getToken() == "done") 6221 continue; 6222 6223 // Handle optional arguments 6224 OptionalIdx[Op.getImmTy()] = i; 6225 } 6226 6227 assert(SrcIdx == 4); 6228 6229 bool Compr = false; 6230 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6231 Compr = true; 6232 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6233 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6234 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6235 } 6236 6237 for (auto i = 0; i < SrcIdx; ++i) { 6238 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6239 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6240 } 6241 } 6242 6243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6245 6246 Inst.addOperand(MCOperand::createImm(EnMask)); 6247 } 6248 6249 //===----------------------------------------------------------------------===// 6250 // s_waitcnt 6251 //===----------------------------------------------------------------------===// 6252 6253 static bool 6254 encodeCnt( 6255 const AMDGPU::IsaVersion ISA, 6256 int64_t &IntVal, 6257 int64_t CntVal, 6258 bool Saturate, 6259 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6260 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6261 { 6262 bool Failed = false; 6263 6264 IntVal = encode(ISA, IntVal, CntVal); 6265 if (CntVal != decode(ISA, IntVal)) { 6266 if (Saturate) { 6267 IntVal = encode(ISA, IntVal, -1); 6268 } else { 6269 Failed = true; 6270 } 6271 } 6272 return Failed; 6273 } 6274 6275 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6276 6277 SMLoc CntLoc = getLoc(); 6278 StringRef CntName = getTokenStr(); 6279 6280 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6281 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6282 return false; 6283 6284 int64_t CntVal; 6285 SMLoc ValLoc = getLoc(); 6286 if (!parseExpr(CntVal)) 6287 return false; 6288 6289 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6290 6291 bool Failed = true; 6292 bool Sat = CntName.endswith("_sat"); 6293 6294 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6295 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6296 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6297 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6298 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6299 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6300 } else { 6301 Error(CntLoc, "invalid counter name " + CntName); 6302 return false; 6303 } 6304 6305 if (Failed) { 6306 Error(ValLoc, "too large value for " + CntName); 6307 return false; 6308 } 6309 6310 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6311 return false; 6312 6313 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6314 if (isToken(AsmToken::EndOfStatement)) { 6315 Error(getLoc(), "expected a counter name"); 6316 return false; 6317 } 6318 } 6319 6320 return true; 6321 } 6322 6323 OperandMatchResultTy 6324 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6325 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6326 int64_t Waitcnt = getWaitcntBitMask(ISA); 6327 SMLoc S = getLoc(); 6328 6329 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6330 while (!isToken(AsmToken::EndOfStatement)) { 6331 if (!parseCnt(Waitcnt)) 6332 return MatchOperand_ParseFail; 6333 } 6334 } else { 6335 if (!parseExpr(Waitcnt)) 6336 return MatchOperand_ParseFail; 6337 } 6338 6339 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6340 return MatchOperand_Success; 6341 } 6342 6343 bool 6344 AMDGPUOperand::isSWaitCnt() const { 6345 return isImm(); 6346 } 6347 6348 //===----------------------------------------------------------------------===// 6349 // DepCtr 6350 //===----------------------------------------------------------------------===// 6351 6352 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6353 StringRef DepCtrName) { 6354 switch (ErrorId) { 6355 case OPR_ID_UNKNOWN: 6356 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6357 return; 6358 case OPR_ID_UNSUPPORTED: 6359 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6360 return; 6361 case OPR_ID_DUPLICATE: 6362 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6363 return; 6364 case OPR_VAL_INVALID: 6365 Error(Loc, Twine("invalid value for ", DepCtrName)); 6366 return; 6367 default: 6368 assert(false); 6369 } 6370 } 6371 6372 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6373 6374 using namespace llvm::AMDGPU::DepCtr; 6375 6376 SMLoc DepCtrLoc = getLoc(); 6377 StringRef DepCtrName = getTokenStr(); 6378 6379 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6380 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6381 return false; 6382 6383 int64_t ExprVal; 6384 if (!parseExpr(ExprVal)) 6385 return false; 6386 6387 unsigned PrevOprMask = UsedOprMask; 6388 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6389 6390 if (CntVal < 0) { 6391 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6392 return false; 6393 } 6394 6395 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6396 return false; 6397 6398 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6399 if (isToken(AsmToken::EndOfStatement)) { 6400 Error(getLoc(), "expected a counter name"); 6401 return false; 6402 } 6403 } 6404 6405 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6406 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6407 return true; 6408 } 6409 6410 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6411 using namespace llvm::AMDGPU::DepCtr; 6412 6413 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6414 SMLoc Loc = getLoc(); 6415 6416 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6417 unsigned UsedOprMask = 0; 6418 while (!isToken(AsmToken::EndOfStatement)) { 6419 if (!parseDepCtr(DepCtr, UsedOprMask)) 6420 return MatchOperand_ParseFail; 6421 } 6422 } else { 6423 if (!parseExpr(DepCtr)) 6424 return MatchOperand_ParseFail; 6425 } 6426 6427 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6428 return MatchOperand_Success; 6429 } 6430 6431 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6432 6433 //===----------------------------------------------------------------------===// 6434 // hwreg 6435 //===----------------------------------------------------------------------===// 6436 6437 bool 6438 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6439 OperandInfoTy &Offset, 6440 OperandInfoTy &Width) { 6441 using namespace llvm::AMDGPU::Hwreg; 6442 6443 // The register may be specified by name or using a numeric code 6444 HwReg.Loc = getLoc(); 6445 if (isToken(AsmToken::Identifier) && 6446 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6447 HwReg.IsSymbolic = true; 6448 lex(); // skip register name 6449 } else if (!parseExpr(HwReg.Id, "a register name")) { 6450 return false; 6451 } 6452 6453 if (trySkipToken(AsmToken::RParen)) 6454 return true; 6455 6456 // parse optional params 6457 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6458 return false; 6459 6460 Offset.Loc = getLoc(); 6461 if (!parseExpr(Offset.Id)) 6462 return false; 6463 6464 if (!skipToken(AsmToken::Comma, "expected a comma")) 6465 return false; 6466 6467 Width.Loc = getLoc(); 6468 return parseExpr(Width.Id) && 6469 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6470 } 6471 6472 bool 6473 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6474 const OperandInfoTy &Offset, 6475 const OperandInfoTy &Width) { 6476 6477 using namespace llvm::AMDGPU::Hwreg; 6478 6479 if (HwReg.IsSymbolic) { 6480 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6481 Error(HwReg.Loc, 6482 "specified hardware register is not supported on this GPU"); 6483 return false; 6484 } 6485 } else { 6486 if (!isValidHwreg(HwReg.Id)) { 6487 Error(HwReg.Loc, 6488 "invalid code of hardware register: only 6-bit values are legal"); 6489 return false; 6490 } 6491 } 6492 if (!isValidHwregOffset(Offset.Id)) { 6493 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6494 return false; 6495 } 6496 if (!isValidHwregWidth(Width.Id)) { 6497 Error(Width.Loc, 6498 "invalid bitfield width: only values from 1 to 32 are legal"); 6499 return false; 6500 } 6501 return true; 6502 } 6503 6504 OperandMatchResultTy 6505 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6506 using namespace llvm::AMDGPU::Hwreg; 6507 6508 int64_t ImmVal = 0; 6509 SMLoc Loc = getLoc(); 6510 6511 if (trySkipId("hwreg", AsmToken::LParen)) { 6512 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6513 OperandInfoTy Offset(OFFSET_DEFAULT_); 6514 OperandInfoTy Width(WIDTH_DEFAULT_); 6515 if (parseHwregBody(HwReg, Offset, Width) && 6516 validateHwreg(HwReg, Offset, Width)) { 6517 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6518 } else { 6519 return MatchOperand_ParseFail; 6520 } 6521 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6522 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6523 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6524 return MatchOperand_ParseFail; 6525 } 6526 } else { 6527 return MatchOperand_ParseFail; 6528 } 6529 6530 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6531 return MatchOperand_Success; 6532 } 6533 6534 bool AMDGPUOperand::isHwreg() const { 6535 return isImmTy(ImmTyHwreg); 6536 } 6537 6538 //===----------------------------------------------------------------------===// 6539 // sendmsg 6540 //===----------------------------------------------------------------------===// 6541 6542 bool 6543 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6544 OperandInfoTy &Op, 6545 OperandInfoTy &Stream) { 6546 using namespace llvm::AMDGPU::SendMsg; 6547 6548 Msg.Loc = getLoc(); 6549 if (isToken(AsmToken::Identifier) && 6550 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6551 Msg.IsSymbolic = true; 6552 lex(); // skip message name 6553 } else if (!parseExpr(Msg.Id, "a message name")) { 6554 return false; 6555 } 6556 6557 if (trySkipToken(AsmToken::Comma)) { 6558 Op.IsDefined = true; 6559 Op.Loc = getLoc(); 6560 if (isToken(AsmToken::Identifier) && 6561 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6562 lex(); // skip operation name 6563 } else if (!parseExpr(Op.Id, "an operation name")) { 6564 return false; 6565 } 6566 6567 if (trySkipToken(AsmToken::Comma)) { 6568 Stream.IsDefined = true; 6569 Stream.Loc = getLoc(); 6570 if (!parseExpr(Stream.Id)) 6571 return false; 6572 } 6573 } 6574 6575 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6576 } 6577 6578 bool 6579 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6580 const OperandInfoTy &Op, 6581 const OperandInfoTy &Stream) { 6582 using namespace llvm::AMDGPU::SendMsg; 6583 6584 // Validation strictness depends on whether message is specified 6585 // in a symbolic or in a numeric form. In the latter case 6586 // only encoding possibility is checked. 6587 bool Strict = Msg.IsSymbolic; 6588 6589 if (Strict) { 6590 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6591 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6592 return false; 6593 } 6594 } else { 6595 if (!isValidMsgId(Msg.Id)) { 6596 Error(Msg.Loc, "invalid message id"); 6597 return false; 6598 } 6599 } 6600 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6601 if (Op.IsDefined) { 6602 Error(Op.Loc, "message does not support operations"); 6603 } else { 6604 Error(Msg.Loc, "missing message operation"); 6605 } 6606 return false; 6607 } 6608 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6609 Error(Op.Loc, "invalid operation id"); 6610 return false; 6611 } 6612 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6613 Error(Stream.Loc, "message operation does not support streams"); 6614 return false; 6615 } 6616 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6617 Error(Stream.Loc, "invalid message stream id"); 6618 return false; 6619 } 6620 return true; 6621 } 6622 6623 OperandMatchResultTy 6624 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6625 using namespace llvm::AMDGPU::SendMsg; 6626 6627 int64_t ImmVal = 0; 6628 SMLoc Loc = getLoc(); 6629 6630 if (trySkipId("sendmsg", AsmToken::LParen)) { 6631 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6632 OperandInfoTy Op(OP_NONE_); 6633 OperandInfoTy Stream(STREAM_ID_NONE_); 6634 if (parseSendMsgBody(Msg, Op, Stream) && 6635 validateSendMsg(Msg, Op, Stream)) { 6636 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6637 } else { 6638 return MatchOperand_ParseFail; 6639 } 6640 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6641 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6642 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6643 return MatchOperand_ParseFail; 6644 } 6645 } else { 6646 return MatchOperand_ParseFail; 6647 } 6648 6649 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6650 return MatchOperand_Success; 6651 } 6652 6653 bool AMDGPUOperand::isSendMsg() const { 6654 return isImmTy(ImmTySendMsg); 6655 } 6656 6657 //===----------------------------------------------------------------------===// 6658 // v_interp 6659 //===----------------------------------------------------------------------===// 6660 6661 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6662 StringRef Str; 6663 SMLoc S = getLoc(); 6664 6665 if (!parseId(Str)) 6666 return MatchOperand_NoMatch; 6667 6668 int Slot = StringSwitch<int>(Str) 6669 .Case("p10", 0) 6670 .Case("p20", 1) 6671 .Case("p0", 2) 6672 .Default(-1); 6673 6674 if (Slot == -1) { 6675 Error(S, "invalid interpolation slot"); 6676 return MatchOperand_ParseFail; 6677 } 6678 6679 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6680 AMDGPUOperand::ImmTyInterpSlot)); 6681 return MatchOperand_Success; 6682 } 6683 6684 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6685 StringRef Str; 6686 SMLoc S = getLoc(); 6687 6688 if (!parseId(Str)) 6689 return MatchOperand_NoMatch; 6690 6691 if (!Str.startswith("attr")) { 6692 Error(S, "invalid interpolation attribute"); 6693 return MatchOperand_ParseFail; 6694 } 6695 6696 StringRef Chan = Str.take_back(2); 6697 int AttrChan = StringSwitch<int>(Chan) 6698 .Case(".x", 0) 6699 .Case(".y", 1) 6700 .Case(".z", 2) 6701 .Case(".w", 3) 6702 .Default(-1); 6703 if (AttrChan == -1) { 6704 Error(S, "invalid or missing interpolation attribute channel"); 6705 return MatchOperand_ParseFail; 6706 } 6707 6708 Str = Str.drop_back(2).drop_front(4); 6709 6710 uint8_t Attr; 6711 if (Str.getAsInteger(10, Attr)) { 6712 Error(S, "invalid or missing interpolation attribute number"); 6713 return MatchOperand_ParseFail; 6714 } 6715 6716 if (Attr > 63) { 6717 Error(S, "out of bounds interpolation attribute number"); 6718 return MatchOperand_ParseFail; 6719 } 6720 6721 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6722 6723 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6724 AMDGPUOperand::ImmTyInterpAttr)); 6725 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6726 AMDGPUOperand::ImmTyAttrChan)); 6727 return MatchOperand_Success; 6728 } 6729 6730 //===----------------------------------------------------------------------===// 6731 // exp 6732 //===----------------------------------------------------------------------===// 6733 6734 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6735 using namespace llvm::AMDGPU::Exp; 6736 6737 StringRef Str; 6738 SMLoc S = getLoc(); 6739 6740 if (!parseId(Str)) 6741 return MatchOperand_NoMatch; 6742 6743 unsigned Id = getTgtId(Str); 6744 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6745 Error(S, (Id == ET_INVALID) ? 6746 "invalid exp target" : 6747 "exp target is not supported on this GPU"); 6748 return MatchOperand_ParseFail; 6749 } 6750 6751 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6752 AMDGPUOperand::ImmTyExpTgt)); 6753 return MatchOperand_Success; 6754 } 6755 6756 //===----------------------------------------------------------------------===// 6757 // parser helpers 6758 //===----------------------------------------------------------------------===// 6759 6760 bool 6761 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6762 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6763 } 6764 6765 bool 6766 AMDGPUAsmParser::isId(const StringRef Id) const { 6767 return isId(getToken(), Id); 6768 } 6769 6770 bool 6771 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6772 return getTokenKind() == Kind; 6773 } 6774 6775 bool 6776 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6777 if (isId(Id)) { 6778 lex(); 6779 return true; 6780 } 6781 return false; 6782 } 6783 6784 bool 6785 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6786 if (isToken(AsmToken::Identifier)) { 6787 StringRef Tok = getTokenStr(); 6788 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6789 lex(); 6790 return true; 6791 } 6792 } 6793 return false; 6794 } 6795 6796 bool 6797 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6798 if (isId(Id) && peekToken().is(Kind)) { 6799 lex(); 6800 lex(); 6801 return true; 6802 } 6803 return false; 6804 } 6805 6806 bool 6807 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6808 if (isToken(Kind)) { 6809 lex(); 6810 return true; 6811 } 6812 return false; 6813 } 6814 6815 bool 6816 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6817 const StringRef ErrMsg) { 6818 if (!trySkipToken(Kind)) { 6819 Error(getLoc(), ErrMsg); 6820 return false; 6821 } 6822 return true; 6823 } 6824 6825 bool 6826 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6827 SMLoc S = getLoc(); 6828 6829 const MCExpr *Expr; 6830 if (Parser.parseExpression(Expr)) 6831 return false; 6832 6833 if (Expr->evaluateAsAbsolute(Imm)) 6834 return true; 6835 6836 if (Expected.empty()) { 6837 Error(S, "expected absolute expression"); 6838 } else { 6839 Error(S, Twine("expected ", Expected) + 6840 Twine(" or an absolute expression")); 6841 } 6842 return false; 6843 } 6844 6845 bool 6846 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6847 SMLoc S = getLoc(); 6848 6849 const MCExpr *Expr; 6850 if (Parser.parseExpression(Expr)) 6851 return false; 6852 6853 int64_t IntVal; 6854 if (Expr->evaluateAsAbsolute(IntVal)) { 6855 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6856 } else { 6857 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6858 } 6859 return true; 6860 } 6861 6862 bool 6863 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6864 if (isToken(AsmToken::String)) { 6865 Val = getToken().getStringContents(); 6866 lex(); 6867 return true; 6868 } else { 6869 Error(getLoc(), ErrMsg); 6870 return false; 6871 } 6872 } 6873 6874 bool 6875 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6876 if (isToken(AsmToken::Identifier)) { 6877 Val = getTokenStr(); 6878 lex(); 6879 return true; 6880 } else { 6881 if (!ErrMsg.empty()) 6882 Error(getLoc(), ErrMsg); 6883 return false; 6884 } 6885 } 6886 6887 AsmToken 6888 AMDGPUAsmParser::getToken() const { 6889 return Parser.getTok(); 6890 } 6891 6892 AsmToken 6893 AMDGPUAsmParser::peekToken() { 6894 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6895 } 6896 6897 void 6898 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6899 auto TokCount = getLexer().peekTokens(Tokens); 6900 6901 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6902 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6903 } 6904 6905 AsmToken::TokenKind 6906 AMDGPUAsmParser::getTokenKind() const { 6907 return getLexer().getKind(); 6908 } 6909 6910 SMLoc 6911 AMDGPUAsmParser::getLoc() const { 6912 return getToken().getLoc(); 6913 } 6914 6915 StringRef 6916 AMDGPUAsmParser::getTokenStr() const { 6917 return getToken().getString(); 6918 } 6919 6920 void 6921 AMDGPUAsmParser::lex() { 6922 Parser.Lex(); 6923 } 6924 6925 SMLoc 6926 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6927 const OperandVector &Operands) const { 6928 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6929 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6930 if (Test(Op)) 6931 return Op.getStartLoc(); 6932 } 6933 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6934 } 6935 6936 SMLoc 6937 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6938 const OperandVector &Operands) const { 6939 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6940 return getOperandLoc(Test, Operands); 6941 } 6942 6943 SMLoc 6944 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6945 const OperandVector &Operands) const { 6946 auto Test = [=](const AMDGPUOperand& Op) { 6947 return Op.isRegKind() && Op.getReg() == Reg; 6948 }; 6949 return getOperandLoc(Test, Operands); 6950 } 6951 6952 SMLoc 6953 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6954 auto Test = [](const AMDGPUOperand& Op) { 6955 return Op.IsImmKindLiteral() || Op.isExpr(); 6956 }; 6957 return getOperandLoc(Test, Operands); 6958 } 6959 6960 SMLoc 6961 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6962 auto Test = [](const AMDGPUOperand& Op) { 6963 return Op.isImmKindConst(); 6964 }; 6965 return getOperandLoc(Test, Operands); 6966 } 6967 6968 //===----------------------------------------------------------------------===// 6969 // swizzle 6970 //===----------------------------------------------------------------------===// 6971 6972 LLVM_READNONE 6973 static unsigned 6974 encodeBitmaskPerm(const unsigned AndMask, 6975 const unsigned OrMask, 6976 const unsigned XorMask) { 6977 using namespace llvm::AMDGPU::Swizzle; 6978 6979 return BITMASK_PERM_ENC | 6980 (AndMask << BITMASK_AND_SHIFT) | 6981 (OrMask << BITMASK_OR_SHIFT) | 6982 (XorMask << BITMASK_XOR_SHIFT); 6983 } 6984 6985 bool 6986 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6987 const unsigned MinVal, 6988 const unsigned MaxVal, 6989 const StringRef ErrMsg, 6990 SMLoc &Loc) { 6991 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6992 return false; 6993 } 6994 Loc = getLoc(); 6995 if (!parseExpr(Op)) { 6996 return false; 6997 } 6998 if (Op < MinVal || Op > MaxVal) { 6999 Error(Loc, ErrMsg); 7000 return false; 7001 } 7002 7003 return true; 7004 } 7005 7006 bool 7007 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7008 const unsigned MinVal, 7009 const unsigned MaxVal, 7010 const StringRef ErrMsg) { 7011 SMLoc Loc; 7012 for (unsigned i = 0; i < OpNum; ++i) { 7013 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7014 return false; 7015 } 7016 7017 return true; 7018 } 7019 7020 bool 7021 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7022 using namespace llvm::AMDGPU::Swizzle; 7023 7024 int64_t Lane[LANE_NUM]; 7025 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7026 "expected a 2-bit lane id")) { 7027 Imm = QUAD_PERM_ENC; 7028 for (unsigned I = 0; I < LANE_NUM; ++I) { 7029 Imm |= Lane[I] << (LANE_SHIFT * I); 7030 } 7031 return true; 7032 } 7033 return false; 7034 } 7035 7036 bool 7037 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7038 using namespace llvm::AMDGPU::Swizzle; 7039 7040 SMLoc Loc; 7041 int64_t GroupSize; 7042 int64_t LaneIdx; 7043 7044 if (!parseSwizzleOperand(GroupSize, 7045 2, 32, 7046 "group size must be in the interval [2,32]", 7047 Loc)) { 7048 return false; 7049 } 7050 if (!isPowerOf2_64(GroupSize)) { 7051 Error(Loc, "group size must be a power of two"); 7052 return false; 7053 } 7054 if (parseSwizzleOperand(LaneIdx, 7055 0, GroupSize - 1, 7056 "lane id must be in the interval [0,group size - 1]", 7057 Loc)) { 7058 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7059 return true; 7060 } 7061 return false; 7062 } 7063 7064 bool 7065 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7066 using namespace llvm::AMDGPU::Swizzle; 7067 7068 SMLoc Loc; 7069 int64_t GroupSize; 7070 7071 if (!parseSwizzleOperand(GroupSize, 7072 2, 32, 7073 "group size must be in the interval [2,32]", 7074 Loc)) { 7075 return false; 7076 } 7077 if (!isPowerOf2_64(GroupSize)) { 7078 Error(Loc, "group size must be a power of two"); 7079 return false; 7080 } 7081 7082 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7083 return true; 7084 } 7085 7086 bool 7087 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7088 using namespace llvm::AMDGPU::Swizzle; 7089 7090 SMLoc Loc; 7091 int64_t GroupSize; 7092 7093 if (!parseSwizzleOperand(GroupSize, 7094 1, 16, 7095 "group size must be in the interval [1,16]", 7096 Loc)) { 7097 return false; 7098 } 7099 if (!isPowerOf2_64(GroupSize)) { 7100 Error(Loc, "group size must be a power of two"); 7101 return false; 7102 } 7103 7104 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7105 return true; 7106 } 7107 7108 bool 7109 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7110 using namespace llvm::AMDGPU::Swizzle; 7111 7112 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7113 return false; 7114 } 7115 7116 StringRef Ctl; 7117 SMLoc StrLoc = getLoc(); 7118 if (!parseString(Ctl)) { 7119 return false; 7120 } 7121 if (Ctl.size() != BITMASK_WIDTH) { 7122 Error(StrLoc, "expected a 5-character mask"); 7123 return false; 7124 } 7125 7126 unsigned AndMask = 0; 7127 unsigned OrMask = 0; 7128 unsigned XorMask = 0; 7129 7130 for (size_t i = 0; i < Ctl.size(); ++i) { 7131 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7132 switch(Ctl[i]) { 7133 default: 7134 Error(StrLoc, "invalid mask"); 7135 return false; 7136 case '0': 7137 break; 7138 case '1': 7139 OrMask |= Mask; 7140 break; 7141 case 'p': 7142 AndMask |= Mask; 7143 break; 7144 case 'i': 7145 AndMask |= Mask; 7146 XorMask |= Mask; 7147 break; 7148 } 7149 } 7150 7151 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7152 return true; 7153 } 7154 7155 bool 7156 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7157 7158 SMLoc OffsetLoc = getLoc(); 7159 7160 if (!parseExpr(Imm, "a swizzle macro")) { 7161 return false; 7162 } 7163 if (!isUInt<16>(Imm)) { 7164 Error(OffsetLoc, "expected a 16-bit offset"); 7165 return false; 7166 } 7167 return true; 7168 } 7169 7170 bool 7171 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7172 using namespace llvm::AMDGPU::Swizzle; 7173 7174 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7175 7176 SMLoc ModeLoc = getLoc(); 7177 bool Ok = false; 7178 7179 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7180 Ok = parseSwizzleQuadPerm(Imm); 7181 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7182 Ok = parseSwizzleBitmaskPerm(Imm); 7183 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7184 Ok = parseSwizzleBroadcast(Imm); 7185 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7186 Ok = parseSwizzleSwap(Imm); 7187 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7188 Ok = parseSwizzleReverse(Imm); 7189 } else { 7190 Error(ModeLoc, "expected a swizzle mode"); 7191 } 7192 7193 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7194 } 7195 7196 return false; 7197 } 7198 7199 OperandMatchResultTy 7200 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7201 SMLoc S = getLoc(); 7202 int64_t Imm = 0; 7203 7204 if (trySkipId("offset")) { 7205 7206 bool Ok = false; 7207 if (skipToken(AsmToken::Colon, "expected a colon")) { 7208 if (trySkipId("swizzle")) { 7209 Ok = parseSwizzleMacro(Imm); 7210 } else { 7211 Ok = parseSwizzleOffset(Imm); 7212 } 7213 } 7214 7215 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7216 7217 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7218 } else { 7219 // Swizzle "offset" operand is optional. 7220 // If it is omitted, try parsing other optional operands. 7221 return parseOptionalOpr(Operands); 7222 } 7223 } 7224 7225 bool 7226 AMDGPUOperand::isSwizzle() const { 7227 return isImmTy(ImmTySwizzle); 7228 } 7229 7230 //===----------------------------------------------------------------------===// 7231 // VGPR Index Mode 7232 //===----------------------------------------------------------------------===// 7233 7234 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7235 7236 using namespace llvm::AMDGPU::VGPRIndexMode; 7237 7238 if (trySkipToken(AsmToken::RParen)) { 7239 return OFF; 7240 } 7241 7242 int64_t Imm = 0; 7243 7244 while (true) { 7245 unsigned Mode = 0; 7246 SMLoc S = getLoc(); 7247 7248 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7249 if (trySkipId(IdSymbolic[ModeId])) { 7250 Mode = 1 << ModeId; 7251 break; 7252 } 7253 } 7254 7255 if (Mode == 0) { 7256 Error(S, (Imm == 0)? 7257 "expected a VGPR index mode or a closing parenthesis" : 7258 "expected a VGPR index mode"); 7259 return UNDEF; 7260 } 7261 7262 if (Imm & Mode) { 7263 Error(S, "duplicate VGPR index mode"); 7264 return UNDEF; 7265 } 7266 Imm |= Mode; 7267 7268 if (trySkipToken(AsmToken::RParen)) 7269 break; 7270 if (!skipToken(AsmToken::Comma, 7271 "expected a comma or a closing parenthesis")) 7272 return UNDEF; 7273 } 7274 7275 return Imm; 7276 } 7277 7278 OperandMatchResultTy 7279 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7280 7281 using namespace llvm::AMDGPU::VGPRIndexMode; 7282 7283 int64_t Imm = 0; 7284 SMLoc S = getLoc(); 7285 7286 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7287 Imm = parseGPRIdxMacro(); 7288 if (Imm == UNDEF) 7289 return MatchOperand_ParseFail; 7290 } else { 7291 if (getParser().parseAbsoluteExpression(Imm)) 7292 return MatchOperand_ParseFail; 7293 if (Imm < 0 || !isUInt<4>(Imm)) { 7294 Error(S, "invalid immediate: only 4-bit values are legal"); 7295 return MatchOperand_ParseFail; 7296 } 7297 } 7298 7299 Operands.push_back( 7300 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7301 return MatchOperand_Success; 7302 } 7303 7304 bool AMDGPUOperand::isGPRIdxMode() const { 7305 return isImmTy(ImmTyGprIdxMode); 7306 } 7307 7308 //===----------------------------------------------------------------------===// 7309 // sopp branch targets 7310 //===----------------------------------------------------------------------===// 7311 7312 OperandMatchResultTy 7313 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7314 7315 // Make sure we are not parsing something 7316 // that looks like a label or an expression but is not. 7317 // This will improve error messages. 7318 if (isRegister() || isModifier()) 7319 return MatchOperand_NoMatch; 7320 7321 if (!parseExpr(Operands)) 7322 return MatchOperand_ParseFail; 7323 7324 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7325 assert(Opr.isImm() || Opr.isExpr()); 7326 SMLoc Loc = Opr.getStartLoc(); 7327 7328 // Currently we do not support arbitrary expressions as branch targets. 7329 // Only labels and absolute expressions are accepted. 7330 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7331 Error(Loc, "expected an absolute expression or a label"); 7332 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7333 Error(Loc, "expected a 16-bit signed jump offset"); 7334 } 7335 7336 return MatchOperand_Success; 7337 } 7338 7339 //===----------------------------------------------------------------------===// 7340 // Boolean holding registers 7341 //===----------------------------------------------------------------------===// 7342 7343 OperandMatchResultTy 7344 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7345 return parseReg(Operands); 7346 } 7347 7348 //===----------------------------------------------------------------------===// 7349 // mubuf 7350 //===----------------------------------------------------------------------===// 7351 7352 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7353 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7354 } 7355 7356 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7357 const OperandVector &Operands, 7358 bool IsAtomic, 7359 bool IsLds) { 7360 bool IsLdsOpcode = IsLds; 7361 bool HasLdsModifier = false; 7362 OptionalImmIndexMap OptionalIdx; 7363 unsigned FirstOperandIdx = 1; 7364 bool IsAtomicReturn = false; 7365 7366 if (IsAtomic) { 7367 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7368 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7369 if (!Op.isCPol()) 7370 continue; 7371 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7372 break; 7373 } 7374 7375 if (!IsAtomicReturn) { 7376 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7377 if (NewOpc != -1) 7378 Inst.setOpcode(NewOpc); 7379 } 7380 7381 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7382 SIInstrFlags::IsAtomicRet; 7383 } 7384 7385 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7386 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7387 7388 // Add the register arguments 7389 if (Op.isReg()) { 7390 Op.addRegOperands(Inst, 1); 7391 // Insert a tied src for atomic return dst. 7392 // This cannot be postponed as subsequent calls to 7393 // addImmOperands rely on correct number of MC operands. 7394 if (IsAtomicReturn && i == FirstOperandIdx) 7395 Op.addRegOperands(Inst, 1); 7396 continue; 7397 } 7398 7399 // Handle the case where soffset is an immediate 7400 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7401 Op.addImmOperands(Inst, 1); 7402 continue; 7403 } 7404 7405 HasLdsModifier |= Op.isLDS(); 7406 7407 // Handle tokens like 'offen' which are sometimes hard-coded into the 7408 // asm string. There are no MCInst operands for these. 7409 if (Op.isToken()) { 7410 continue; 7411 } 7412 assert(Op.isImm()); 7413 7414 // Handle optional arguments 7415 OptionalIdx[Op.getImmTy()] = i; 7416 } 7417 7418 // This is a workaround for an llvm quirk which may result in an 7419 // incorrect instruction selection. Lds and non-lds versions of 7420 // MUBUF instructions are identical except that lds versions 7421 // have mandatory 'lds' modifier. However this modifier follows 7422 // optional modifiers and llvm asm matcher regards this 'lds' 7423 // modifier as an optional one. As a result, an lds version 7424 // of opcode may be selected even if it has no 'lds' modifier. 7425 if (IsLdsOpcode && !HasLdsModifier) { 7426 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7427 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7428 Inst.setOpcode(NoLdsOpcode); 7429 IsLdsOpcode = false; 7430 } 7431 } 7432 7433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7434 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7435 7436 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7437 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7438 } 7439 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7440 } 7441 7442 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7443 OptionalImmIndexMap OptionalIdx; 7444 7445 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7446 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7447 7448 // Add the register arguments 7449 if (Op.isReg()) { 7450 Op.addRegOperands(Inst, 1); 7451 continue; 7452 } 7453 7454 // Handle the case where soffset is an immediate 7455 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7456 Op.addImmOperands(Inst, 1); 7457 continue; 7458 } 7459 7460 // Handle tokens like 'offen' which are sometimes hard-coded into the 7461 // asm string. There are no MCInst operands for these. 7462 if (Op.isToken()) { 7463 continue; 7464 } 7465 assert(Op.isImm()); 7466 7467 // Handle optional arguments 7468 OptionalIdx[Op.getImmTy()] = i; 7469 } 7470 7471 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7472 AMDGPUOperand::ImmTyOffset); 7473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7475 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7476 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7477 } 7478 7479 //===----------------------------------------------------------------------===// 7480 // mimg 7481 //===----------------------------------------------------------------------===// 7482 7483 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7484 bool IsAtomic) { 7485 unsigned I = 1; 7486 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7487 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7488 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7489 } 7490 7491 if (IsAtomic) { 7492 // Add src, same as dst 7493 assert(Desc.getNumDefs() == 1); 7494 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7495 } 7496 7497 OptionalImmIndexMap OptionalIdx; 7498 7499 for (unsigned E = Operands.size(); I != E; ++I) { 7500 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7501 7502 // Add the register arguments 7503 if (Op.isReg()) { 7504 Op.addRegOperands(Inst, 1); 7505 } else if (Op.isImmModifier()) { 7506 OptionalIdx[Op.getImmTy()] = I; 7507 } else if (!Op.isToken()) { 7508 llvm_unreachable("unexpected operand type"); 7509 } 7510 } 7511 7512 bool IsGFX10Plus = isGFX10Plus(); 7513 7514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7515 if (IsGFX10Plus) 7516 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7517 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7519 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7520 if (IsGFX10Plus) 7521 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7522 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7523 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7524 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7525 if (!IsGFX10Plus) 7526 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7527 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7528 } 7529 7530 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7531 cvtMIMG(Inst, Operands, true); 7532 } 7533 7534 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7535 OptionalImmIndexMap OptionalIdx; 7536 bool IsAtomicReturn = false; 7537 7538 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7539 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7540 if (!Op.isCPol()) 7541 continue; 7542 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7543 break; 7544 } 7545 7546 if (!IsAtomicReturn) { 7547 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7548 if (NewOpc != -1) 7549 Inst.setOpcode(NewOpc); 7550 } 7551 7552 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7553 SIInstrFlags::IsAtomicRet; 7554 7555 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7556 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7557 7558 // Add the register arguments 7559 if (Op.isReg()) { 7560 Op.addRegOperands(Inst, 1); 7561 if (IsAtomicReturn && i == 1) 7562 Op.addRegOperands(Inst, 1); 7563 continue; 7564 } 7565 7566 // Handle the case where soffset is an immediate 7567 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7568 Op.addImmOperands(Inst, 1); 7569 continue; 7570 } 7571 7572 // Handle tokens like 'offen' which are sometimes hard-coded into the 7573 // asm string. There are no MCInst operands for these. 7574 if (Op.isToken()) { 7575 continue; 7576 } 7577 assert(Op.isImm()); 7578 7579 // Handle optional arguments 7580 OptionalIdx[Op.getImmTy()] = i; 7581 } 7582 7583 if ((int)Inst.getNumOperands() <= 7584 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7586 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7587 } 7588 7589 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7590 const OperandVector &Operands) { 7591 for (unsigned I = 1; I < Operands.size(); ++I) { 7592 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7593 if (Operand.isReg()) 7594 Operand.addRegOperands(Inst, 1); 7595 } 7596 7597 Inst.addOperand(MCOperand::createImm(1)); // a16 7598 } 7599 7600 //===----------------------------------------------------------------------===// 7601 // smrd 7602 //===----------------------------------------------------------------------===// 7603 7604 bool AMDGPUOperand::isSMRDOffset8() const { 7605 return isImm() && isUInt<8>(getImm()); 7606 } 7607 7608 bool AMDGPUOperand::isSMEMOffset() const { 7609 return isImm(); // Offset range is checked later by validator. 7610 } 7611 7612 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7613 // 32-bit literals are only supported on CI and we only want to use them 7614 // when the offset is > 8-bits. 7615 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7616 } 7617 7618 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7619 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7620 } 7621 7622 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7623 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7624 } 7625 7626 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7627 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7628 } 7629 7630 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7631 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7632 } 7633 7634 //===----------------------------------------------------------------------===// 7635 // vop3 7636 //===----------------------------------------------------------------------===// 7637 7638 static bool ConvertOmodMul(int64_t &Mul) { 7639 if (Mul != 1 && Mul != 2 && Mul != 4) 7640 return false; 7641 7642 Mul >>= 1; 7643 return true; 7644 } 7645 7646 static bool ConvertOmodDiv(int64_t &Div) { 7647 if (Div == 1) { 7648 Div = 0; 7649 return true; 7650 } 7651 7652 if (Div == 2) { 7653 Div = 3; 7654 return true; 7655 } 7656 7657 return false; 7658 } 7659 7660 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7661 // This is intentional and ensures compatibility with sp3. 7662 // See bug 35397 for details. 7663 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7664 if (BoundCtrl == 0 || BoundCtrl == 1) { 7665 BoundCtrl = 1; 7666 return true; 7667 } 7668 return false; 7669 } 7670 7671 // Note: the order in this table matches the order of operands in AsmString. 7672 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7673 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7674 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7675 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7676 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7677 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7678 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7679 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7680 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7681 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7682 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7683 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7684 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7685 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7686 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7687 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7688 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7689 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7690 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7691 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7692 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7693 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7694 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7695 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7696 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7697 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7698 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7699 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7700 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7701 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7702 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7703 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7704 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7705 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7706 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7707 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7708 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7709 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7710 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7711 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7712 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7713 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7714 }; 7715 7716 void AMDGPUAsmParser::onBeginOfFile() { 7717 if (!getParser().getStreamer().getTargetStreamer() || 7718 getSTI().getTargetTriple().getArch() == Triple::r600) 7719 return; 7720 7721 if (!getTargetStreamer().getTargetID()) 7722 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7723 7724 if (isHsaAbiVersion3AndAbove(&getSTI())) 7725 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7726 } 7727 7728 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7729 7730 OperandMatchResultTy res = parseOptionalOpr(Operands); 7731 7732 // This is a hack to enable hardcoded mandatory operands which follow 7733 // optional operands. 7734 // 7735 // Current design assumes that all operands after the first optional operand 7736 // are also optional. However implementation of some instructions violates 7737 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7738 // 7739 // To alleviate this problem, we have to (implicitly) parse extra operands 7740 // to make sure autogenerated parser of custom operands never hit hardcoded 7741 // mandatory operands. 7742 7743 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7744 if (res != MatchOperand_Success || 7745 isToken(AsmToken::EndOfStatement)) 7746 break; 7747 7748 trySkipToken(AsmToken::Comma); 7749 res = parseOptionalOpr(Operands); 7750 } 7751 7752 return res; 7753 } 7754 7755 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7756 OperandMatchResultTy res; 7757 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7758 // try to parse any optional operand here 7759 if (Op.IsBit) { 7760 res = parseNamedBit(Op.Name, Operands, Op.Type); 7761 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7762 res = parseOModOperand(Operands); 7763 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7764 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7765 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7766 res = parseSDWASel(Operands, Op.Name, Op.Type); 7767 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7768 res = parseSDWADstUnused(Operands); 7769 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7770 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7771 Op.Type == AMDGPUOperand::ImmTyNegLo || 7772 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7773 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7774 Op.ConvertResult); 7775 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7776 res = parseDim(Operands); 7777 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7778 res = parseCPol(Operands); 7779 } else { 7780 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7781 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7782 res = parseOperandArrayWithPrefix("neg", Operands, 7783 AMDGPUOperand::ImmTyBLGP, 7784 nullptr); 7785 } 7786 } 7787 if (res != MatchOperand_NoMatch) { 7788 return res; 7789 } 7790 } 7791 return MatchOperand_NoMatch; 7792 } 7793 7794 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7795 StringRef Name = getTokenStr(); 7796 if (Name == "mul") { 7797 return parseIntWithPrefix("mul", Operands, 7798 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7799 } 7800 7801 if (Name == "div") { 7802 return parseIntWithPrefix("div", Operands, 7803 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7804 } 7805 7806 return MatchOperand_NoMatch; 7807 } 7808 7809 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7810 cvtVOP3P(Inst, Operands); 7811 7812 int Opc = Inst.getOpcode(); 7813 7814 int SrcNum; 7815 const int Ops[] = { AMDGPU::OpName::src0, 7816 AMDGPU::OpName::src1, 7817 AMDGPU::OpName::src2 }; 7818 for (SrcNum = 0; 7819 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7820 ++SrcNum); 7821 assert(SrcNum > 0); 7822 7823 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7824 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7825 7826 if ((OpSel & (1 << SrcNum)) != 0) { 7827 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7828 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7829 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7830 } 7831 } 7832 7833 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7834 // 1. This operand is input modifiers 7835 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7836 // 2. This is not last operand 7837 && Desc.NumOperands > (OpNum + 1) 7838 // 3. Next operand is register class 7839 && Desc.OpInfo[OpNum + 1].RegClass != -1 7840 // 4. Next register is not tied to any other operand 7841 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7842 } 7843 7844 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7845 { 7846 OptionalImmIndexMap OptionalIdx; 7847 unsigned Opc = Inst.getOpcode(); 7848 7849 unsigned I = 1; 7850 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7851 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7852 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7853 } 7854 7855 for (unsigned E = Operands.size(); I != E; ++I) { 7856 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7857 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7858 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7859 } else if (Op.isInterpSlot() || 7860 Op.isInterpAttr() || 7861 Op.isAttrChan()) { 7862 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7863 } else if (Op.isImmModifier()) { 7864 OptionalIdx[Op.getImmTy()] = I; 7865 } else { 7866 llvm_unreachable("unhandled operand type"); 7867 } 7868 } 7869 7870 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7871 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7872 } 7873 7874 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7876 } 7877 7878 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7879 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7880 } 7881 } 7882 7883 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7884 OptionalImmIndexMap &OptionalIdx) { 7885 unsigned Opc = Inst.getOpcode(); 7886 7887 unsigned I = 1; 7888 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7889 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7890 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7891 } 7892 7893 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7894 // This instruction has src modifiers 7895 for (unsigned E = Operands.size(); I != E; ++I) { 7896 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7897 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7898 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7899 } else if (Op.isImmModifier()) { 7900 OptionalIdx[Op.getImmTy()] = I; 7901 } else if (Op.isRegOrImm()) { 7902 Op.addRegOrImmOperands(Inst, 1); 7903 } else { 7904 llvm_unreachable("unhandled operand type"); 7905 } 7906 } 7907 } else { 7908 // No src modifiers 7909 for (unsigned E = Operands.size(); I != E; ++I) { 7910 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7911 if (Op.isMod()) { 7912 OptionalIdx[Op.getImmTy()] = I; 7913 } else { 7914 Op.addRegOrImmOperands(Inst, 1); 7915 } 7916 } 7917 } 7918 7919 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7920 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7921 } 7922 7923 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7924 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7925 } 7926 7927 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7928 // it has src2 register operand that is tied to dst operand 7929 // we don't allow modifiers for this operand in assembler so src2_modifiers 7930 // should be 0. 7931 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7932 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7933 Opc == AMDGPU::V_MAC_F32_e64_vi || 7934 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7935 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7936 Opc == AMDGPU::V_MAC_F16_e64_vi || 7937 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7938 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7939 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7940 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7941 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7942 auto it = Inst.begin(); 7943 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7944 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7945 ++it; 7946 // Copy the operand to ensure it's not invalidated when Inst grows. 7947 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7948 } 7949 } 7950 7951 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7952 OptionalImmIndexMap OptionalIdx; 7953 cvtVOP3(Inst, Operands, OptionalIdx); 7954 } 7955 7956 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7957 OptionalImmIndexMap &OptIdx) { 7958 const int Opc = Inst.getOpcode(); 7959 const MCInstrDesc &Desc = MII.get(Opc); 7960 7961 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7962 7963 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7964 assert(!IsPacked); 7965 Inst.addOperand(Inst.getOperand(0)); 7966 } 7967 7968 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7969 // instruction, and then figure out where to actually put the modifiers 7970 7971 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7972 if (OpSelIdx != -1) { 7973 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7974 } 7975 7976 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7977 if (OpSelHiIdx != -1) { 7978 int DefaultVal = IsPacked ? -1 : 0; 7979 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7980 DefaultVal); 7981 } 7982 7983 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7984 if (NegLoIdx != -1) { 7985 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7986 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7987 } 7988 7989 const int Ops[] = { AMDGPU::OpName::src0, 7990 AMDGPU::OpName::src1, 7991 AMDGPU::OpName::src2 }; 7992 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7993 AMDGPU::OpName::src1_modifiers, 7994 AMDGPU::OpName::src2_modifiers }; 7995 7996 unsigned OpSel = 0; 7997 unsigned OpSelHi = 0; 7998 unsigned NegLo = 0; 7999 unsigned NegHi = 0; 8000 8001 if (OpSelIdx != -1) 8002 OpSel = Inst.getOperand(OpSelIdx).getImm(); 8003 8004 if (OpSelHiIdx != -1) 8005 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 8006 8007 if (NegLoIdx != -1) { 8008 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8009 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8010 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8011 } 8012 8013 for (int J = 0; J < 3; ++J) { 8014 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8015 if (OpIdx == -1) 8016 break; 8017 8018 uint32_t ModVal = 0; 8019 8020 if ((OpSel & (1 << J)) != 0) 8021 ModVal |= SISrcMods::OP_SEL_0; 8022 8023 if ((OpSelHi & (1 << J)) != 0) 8024 ModVal |= SISrcMods::OP_SEL_1; 8025 8026 if ((NegLo & (1 << J)) != 0) 8027 ModVal |= SISrcMods::NEG; 8028 8029 if ((NegHi & (1 << J)) != 0) 8030 ModVal |= SISrcMods::NEG_HI; 8031 8032 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8033 8034 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8035 } 8036 } 8037 8038 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8039 OptionalImmIndexMap OptIdx; 8040 cvtVOP3(Inst, Operands, OptIdx); 8041 cvtVOP3P(Inst, Operands, OptIdx); 8042 } 8043 8044 //===----------------------------------------------------------------------===// 8045 // dpp 8046 //===----------------------------------------------------------------------===// 8047 8048 bool AMDGPUOperand::isDPP8() const { 8049 return isImmTy(ImmTyDPP8); 8050 } 8051 8052 bool AMDGPUOperand::isDPPCtrl() const { 8053 using namespace AMDGPU::DPP; 8054 8055 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8056 if (result) { 8057 int64_t Imm = getImm(); 8058 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8059 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8060 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8061 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8062 (Imm == DppCtrl::WAVE_SHL1) || 8063 (Imm == DppCtrl::WAVE_ROL1) || 8064 (Imm == DppCtrl::WAVE_SHR1) || 8065 (Imm == DppCtrl::WAVE_ROR1) || 8066 (Imm == DppCtrl::ROW_MIRROR) || 8067 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8068 (Imm == DppCtrl::BCAST15) || 8069 (Imm == DppCtrl::BCAST31) || 8070 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8071 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8072 } 8073 return false; 8074 } 8075 8076 //===----------------------------------------------------------------------===// 8077 // mAI 8078 //===----------------------------------------------------------------------===// 8079 8080 bool AMDGPUOperand::isBLGP() const { 8081 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8082 } 8083 8084 bool AMDGPUOperand::isCBSZ() const { 8085 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8086 } 8087 8088 bool AMDGPUOperand::isABID() const { 8089 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8090 } 8091 8092 bool AMDGPUOperand::isS16Imm() const { 8093 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8094 } 8095 8096 bool AMDGPUOperand::isU16Imm() const { 8097 return isImm() && isUInt<16>(getImm()); 8098 } 8099 8100 //===----------------------------------------------------------------------===// 8101 // dim 8102 //===----------------------------------------------------------------------===// 8103 8104 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8105 // We want to allow "dim:1D" etc., 8106 // but the initial 1 is tokenized as an integer. 8107 std::string Token; 8108 if (isToken(AsmToken::Integer)) { 8109 SMLoc Loc = getToken().getEndLoc(); 8110 Token = std::string(getTokenStr()); 8111 lex(); 8112 if (getLoc() != Loc) 8113 return false; 8114 } 8115 8116 StringRef Suffix; 8117 if (!parseId(Suffix)) 8118 return false; 8119 Token += Suffix; 8120 8121 StringRef DimId = Token; 8122 if (DimId.startswith("SQ_RSRC_IMG_")) 8123 DimId = DimId.drop_front(12); 8124 8125 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8126 if (!DimInfo) 8127 return false; 8128 8129 Encoding = DimInfo->Encoding; 8130 return true; 8131 } 8132 8133 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8134 if (!isGFX10Plus()) 8135 return MatchOperand_NoMatch; 8136 8137 SMLoc S = getLoc(); 8138 8139 if (!trySkipId("dim", AsmToken::Colon)) 8140 return MatchOperand_NoMatch; 8141 8142 unsigned Encoding; 8143 SMLoc Loc = getLoc(); 8144 if (!parseDimId(Encoding)) { 8145 Error(Loc, "invalid dim value"); 8146 return MatchOperand_ParseFail; 8147 } 8148 8149 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8150 AMDGPUOperand::ImmTyDim)); 8151 return MatchOperand_Success; 8152 } 8153 8154 //===----------------------------------------------------------------------===// 8155 // dpp 8156 //===----------------------------------------------------------------------===// 8157 8158 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8159 SMLoc S = getLoc(); 8160 8161 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8162 return MatchOperand_NoMatch; 8163 8164 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8165 8166 int64_t Sels[8]; 8167 8168 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8169 return MatchOperand_ParseFail; 8170 8171 for (size_t i = 0; i < 8; ++i) { 8172 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8173 return MatchOperand_ParseFail; 8174 8175 SMLoc Loc = getLoc(); 8176 if (getParser().parseAbsoluteExpression(Sels[i])) 8177 return MatchOperand_ParseFail; 8178 if (0 > Sels[i] || 7 < Sels[i]) { 8179 Error(Loc, "expected a 3-bit value"); 8180 return MatchOperand_ParseFail; 8181 } 8182 } 8183 8184 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8185 return MatchOperand_ParseFail; 8186 8187 unsigned DPP8 = 0; 8188 for (size_t i = 0; i < 8; ++i) 8189 DPP8 |= (Sels[i] << (i * 3)); 8190 8191 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8192 return MatchOperand_Success; 8193 } 8194 8195 bool 8196 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8197 const OperandVector &Operands) { 8198 if (Ctrl == "row_newbcast") 8199 return isGFX90A(); 8200 8201 if (Ctrl == "row_share" || 8202 Ctrl == "row_xmask") 8203 return isGFX10Plus(); 8204 8205 if (Ctrl == "wave_shl" || 8206 Ctrl == "wave_shr" || 8207 Ctrl == "wave_rol" || 8208 Ctrl == "wave_ror" || 8209 Ctrl == "row_bcast") 8210 return isVI() || isGFX9(); 8211 8212 return Ctrl == "row_mirror" || 8213 Ctrl == "row_half_mirror" || 8214 Ctrl == "quad_perm" || 8215 Ctrl == "row_shl" || 8216 Ctrl == "row_shr" || 8217 Ctrl == "row_ror"; 8218 } 8219 8220 int64_t 8221 AMDGPUAsmParser::parseDPPCtrlPerm() { 8222 // quad_perm:[%d,%d,%d,%d] 8223 8224 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8225 return -1; 8226 8227 int64_t Val = 0; 8228 for (int i = 0; i < 4; ++i) { 8229 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8230 return -1; 8231 8232 int64_t Temp; 8233 SMLoc Loc = getLoc(); 8234 if (getParser().parseAbsoluteExpression(Temp)) 8235 return -1; 8236 if (Temp < 0 || Temp > 3) { 8237 Error(Loc, "expected a 2-bit value"); 8238 return -1; 8239 } 8240 8241 Val += (Temp << i * 2); 8242 } 8243 8244 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8245 return -1; 8246 8247 return Val; 8248 } 8249 8250 int64_t 8251 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8252 using namespace AMDGPU::DPP; 8253 8254 // sel:%d 8255 8256 int64_t Val; 8257 SMLoc Loc = getLoc(); 8258 8259 if (getParser().parseAbsoluteExpression(Val)) 8260 return -1; 8261 8262 struct DppCtrlCheck { 8263 int64_t Ctrl; 8264 int Lo; 8265 int Hi; 8266 }; 8267 8268 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8269 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8270 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8271 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8272 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8273 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8274 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8275 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8276 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8277 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8278 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8279 .Default({-1, 0, 0}); 8280 8281 bool Valid; 8282 if (Check.Ctrl == -1) { 8283 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8284 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8285 } else { 8286 Valid = Check.Lo <= Val && Val <= Check.Hi; 8287 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8288 } 8289 8290 if (!Valid) { 8291 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8292 return -1; 8293 } 8294 8295 return Val; 8296 } 8297 8298 OperandMatchResultTy 8299 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8300 using namespace AMDGPU::DPP; 8301 8302 if (!isToken(AsmToken::Identifier) || 8303 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8304 return MatchOperand_NoMatch; 8305 8306 SMLoc S = getLoc(); 8307 int64_t Val = -1; 8308 StringRef Ctrl; 8309 8310 parseId(Ctrl); 8311 8312 if (Ctrl == "row_mirror") { 8313 Val = DppCtrl::ROW_MIRROR; 8314 } else if (Ctrl == "row_half_mirror") { 8315 Val = DppCtrl::ROW_HALF_MIRROR; 8316 } else { 8317 if (skipToken(AsmToken::Colon, "expected a colon")) { 8318 if (Ctrl == "quad_perm") { 8319 Val = parseDPPCtrlPerm(); 8320 } else { 8321 Val = parseDPPCtrlSel(Ctrl); 8322 } 8323 } 8324 } 8325 8326 if (Val == -1) 8327 return MatchOperand_ParseFail; 8328 8329 Operands.push_back( 8330 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8331 return MatchOperand_Success; 8332 } 8333 8334 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8335 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8336 } 8337 8338 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8339 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8340 } 8341 8342 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8343 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8344 } 8345 8346 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8347 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8348 } 8349 8350 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8351 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8352 } 8353 8354 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8355 OptionalImmIndexMap OptionalIdx; 8356 8357 unsigned Opc = Inst.getOpcode(); 8358 bool HasModifiers = 8359 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8360 unsigned I = 1; 8361 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8362 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8363 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8364 } 8365 8366 int Fi = 0; 8367 for (unsigned E = Operands.size(); I != E; ++I) { 8368 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8369 MCOI::TIED_TO); 8370 if (TiedTo != -1) { 8371 assert((unsigned)TiedTo < Inst.getNumOperands()); 8372 // handle tied old or src2 for MAC instructions 8373 Inst.addOperand(Inst.getOperand(TiedTo)); 8374 } 8375 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8376 // Add the register arguments 8377 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8378 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8379 // Skip it. 8380 continue; 8381 } 8382 8383 if (IsDPP8) { 8384 if (Op.isDPP8()) { 8385 Op.addImmOperands(Inst, 1); 8386 } else if (HasModifiers && 8387 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8388 Op.addRegWithFPInputModsOperands(Inst, 2); 8389 } else if (Op.isFI()) { 8390 Fi = Op.getImm(); 8391 } else if (Op.isReg()) { 8392 Op.addRegOperands(Inst, 1); 8393 } else { 8394 llvm_unreachable("Invalid operand type"); 8395 } 8396 } else { 8397 if (HasModifiers && 8398 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8399 Op.addRegWithFPInputModsOperands(Inst, 2); 8400 } else if (Op.isReg()) { 8401 Op.addRegOperands(Inst, 1); 8402 } else if (Op.isDPPCtrl()) { 8403 Op.addImmOperands(Inst, 1); 8404 } else if (Op.isImm()) { 8405 // Handle optional arguments 8406 OptionalIdx[Op.getImmTy()] = I; 8407 } else { 8408 llvm_unreachable("Invalid operand type"); 8409 } 8410 } 8411 } 8412 8413 if (IsDPP8) { 8414 using namespace llvm::AMDGPU::DPP; 8415 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8416 } else { 8417 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8418 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8419 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8420 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8421 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8422 } 8423 } 8424 } 8425 8426 //===----------------------------------------------------------------------===// 8427 // sdwa 8428 //===----------------------------------------------------------------------===// 8429 8430 OperandMatchResultTy 8431 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8432 AMDGPUOperand::ImmTy Type) { 8433 using namespace llvm::AMDGPU::SDWA; 8434 8435 SMLoc S = getLoc(); 8436 StringRef Value; 8437 OperandMatchResultTy res; 8438 8439 SMLoc StringLoc; 8440 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8441 if (res != MatchOperand_Success) { 8442 return res; 8443 } 8444 8445 int64_t Int; 8446 Int = StringSwitch<int64_t>(Value) 8447 .Case("BYTE_0", SdwaSel::BYTE_0) 8448 .Case("BYTE_1", SdwaSel::BYTE_1) 8449 .Case("BYTE_2", SdwaSel::BYTE_2) 8450 .Case("BYTE_3", SdwaSel::BYTE_3) 8451 .Case("WORD_0", SdwaSel::WORD_0) 8452 .Case("WORD_1", SdwaSel::WORD_1) 8453 .Case("DWORD", SdwaSel::DWORD) 8454 .Default(0xffffffff); 8455 8456 if (Int == 0xffffffff) { 8457 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8458 return MatchOperand_ParseFail; 8459 } 8460 8461 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8462 return MatchOperand_Success; 8463 } 8464 8465 OperandMatchResultTy 8466 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8467 using namespace llvm::AMDGPU::SDWA; 8468 8469 SMLoc S = getLoc(); 8470 StringRef Value; 8471 OperandMatchResultTy res; 8472 8473 SMLoc StringLoc; 8474 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8475 if (res != MatchOperand_Success) { 8476 return res; 8477 } 8478 8479 int64_t Int; 8480 Int = StringSwitch<int64_t>(Value) 8481 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8482 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8483 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8484 .Default(0xffffffff); 8485 8486 if (Int == 0xffffffff) { 8487 Error(StringLoc, "invalid dst_unused value"); 8488 return MatchOperand_ParseFail; 8489 } 8490 8491 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8492 return MatchOperand_Success; 8493 } 8494 8495 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8496 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8497 } 8498 8499 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8500 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8501 } 8502 8503 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8504 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8505 } 8506 8507 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8508 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8509 } 8510 8511 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8512 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8513 } 8514 8515 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8516 uint64_t BasicInstType, 8517 bool SkipDstVcc, 8518 bool SkipSrcVcc) { 8519 using namespace llvm::AMDGPU::SDWA; 8520 8521 OptionalImmIndexMap OptionalIdx; 8522 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8523 bool SkippedVcc = false; 8524 8525 unsigned I = 1; 8526 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8527 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8528 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8529 } 8530 8531 for (unsigned E = Operands.size(); I != E; ++I) { 8532 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8533 if (SkipVcc && !SkippedVcc && Op.isReg() && 8534 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8535 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8536 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8537 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8538 // Skip VCC only if we didn't skip it on previous iteration. 8539 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8540 if (BasicInstType == SIInstrFlags::VOP2 && 8541 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8542 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8543 SkippedVcc = true; 8544 continue; 8545 } else if (BasicInstType == SIInstrFlags::VOPC && 8546 Inst.getNumOperands() == 0) { 8547 SkippedVcc = true; 8548 continue; 8549 } 8550 } 8551 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8552 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8553 } else if (Op.isImm()) { 8554 // Handle optional arguments 8555 OptionalIdx[Op.getImmTy()] = I; 8556 } else { 8557 llvm_unreachable("Invalid operand type"); 8558 } 8559 SkippedVcc = false; 8560 } 8561 8562 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8563 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8564 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8565 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8566 switch (BasicInstType) { 8567 case SIInstrFlags::VOP1: 8568 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8569 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8570 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8571 } 8572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8575 break; 8576 8577 case SIInstrFlags::VOP2: 8578 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8579 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8580 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8581 } 8582 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8583 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8584 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8586 break; 8587 8588 case SIInstrFlags::VOPC: 8589 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8591 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8593 break; 8594 8595 default: 8596 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8597 } 8598 } 8599 8600 // special case v_mac_{f16, f32}: 8601 // it has src2 register operand that is tied to dst operand 8602 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8603 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8604 auto it = Inst.begin(); 8605 std::advance( 8606 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8607 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8608 } 8609 } 8610 8611 //===----------------------------------------------------------------------===// 8612 // mAI 8613 //===----------------------------------------------------------------------===// 8614 8615 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8616 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8617 } 8618 8619 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8620 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8621 } 8622 8623 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8624 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8625 } 8626 8627 /// Force static initialization. 8628 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8629 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8630 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8631 } 8632 8633 #define GET_REGISTER_MATCHER 8634 #define GET_MATCHER_IMPLEMENTATION 8635 #define GET_MNEMONIC_SPELL_CHECKER 8636 #define GET_MNEMONIC_CHECKER 8637 #include "AMDGPUGenAsmMatcher.inc" 8638 8639 // This function should be defined after auto-generated include so that we have 8640 // MatchClassKind enum defined 8641 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8642 unsigned Kind) { 8643 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8644 // But MatchInstructionImpl() expects to meet token and fails to validate 8645 // operand. This method checks if we are given immediate operand but expect to 8646 // get corresponding token. 8647 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8648 switch (Kind) { 8649 case MCK_addr64: 8650 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8651 case MCK_gds: 8652 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8653 case MCK_lds: 8654 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8655 case MCK_idxen: 8656 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8657 case MCK_offen: 8658 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8659 case MCK_SSrcB32: 8660 // When operands have expression values, they will return true for isToken, 8661 // because it is not possible to distinguish between a token and an 8662 // expression at parse time. MatchInstructionImpl() will always try to 8663 // match an operand as a token, when isToken returns true, and when the 8664 // name of the expression is not a valid token, the match will fail, 8665 // so we need to handle it here. 8666 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8667 case MCK_SSrcF32: 8668 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8669 case MCK_SoppBrTarget: 8670 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8671 case MCK_VReg32OrOff: 8672 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8673 case MCK_InterpSlot: 8674 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8675 case MCK_Attr: 8676 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8677 case MCK_AttrChan: 8678 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8679 case MCK_ImmSMEMOffset: 8680 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8681 case MCK_SReg_64: 8682 case MCK_SReg_64_XEXEC: 8683 // Null is defined as a 32-bit register but 8684 // it should also be enabled with 64-bit operands. 8685 // The following code enables it for SReg_64 operands 8686 // used as source and destination. Remaining source 8687 // operands are handled in isInlinableImm. 8688 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8689 default: 8690 return Match_InvalidOperand; 8691 } 8692 } 8693 8694 //===----------------------------------------------------------------------===// 8695 // endpgm 8696 //===----------------------------------------------------------------------===// 8697 8698 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8699 SMLoc S = getLoc(); 8700 int64_t Imm = 0; 8701 8702 if (!parseExpr(Imm)) { 8703 // The operand is optional, if not present default to 0 8704 Imm = 0; 8705 } 8706 8707 if (!isUInt<16>(Imm)) { 8708 Error(S, "expected a 16-bit value"); 8709 return MatchOperand_ParseFail; 8710 } 8711 8712 Operands.push_back( 8713 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8714 return MatchOperand_Success; 8715 } 8716 8717 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8718