1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isHwreg() const; 822 bool isSendMsg() const; 823 bool isSwizzle() const; 824 bool isSMRDOffset8() const; 825 bool isSMEMOffset() const; 826 bool isSMRDLiteralOffset() const; 827 bool isDPP8() const; 828 bool isDPPCtrl() const; 829 bool isBLGP() const; 830 bool isCBSZ() const; 831 bool isABID() const; 832 bool isGPRIdxMode() const; 833 bool isS16Imm() const; 834 bool isU16Imm() const; 835 bool isEndpgm() const; 836 837 StringRef getExpressionAsToken() const { 838 assert(isExpr()); 839 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 840 return S->getSymbol().getName(); 841 } 842 843 StringRef getToken() const { 844 assert(isToken()); 845 846 if (Kind == Expression) 847 return getExpressionAsToken(); 848 849 return StringRef(Tok.Data, Tok.Length); 850 } 851 852 int64_t getImm() const { 853 assert(isImm()); 854 return Imm.Val; 855 } 856 857 void setImm(int64_t Val) { 858 assert(isImm()); 859 Imm.Val = Val; 860 } 861 862 ImmTy getImmTy() const { 863 assert(isImm()); 864 return Imm.Type; 865 } 866 867 unsigned getReg() const override { 868 assert(isRegKind()); 869 return Reg.RegNo; 870 } 871 872 SMLoc getStartLoc() const override { 873 return StartLoc; 874 } 875 876 SMLoc getEndLoc() const override { 877 return EndLoc; 878 } 879 880 SMRange getLocRange() const { 881 return SMRange(StartLoc, EndLoc); 882 } 883 884 Modifiers getModifiers() const { 885 assert(isRegKind() || isImmTy(ImmTyNone)); 886 return isRegKind() ? Reg.Mods : Imm.Mods; 887 } 888 889 void setModifiers(Modifiers Mods) { 890 assert(isRegKind() || isImmTy(ImmTyNone)); 891 if (isRegKind()) 892 Reg.Mods = Mods; 893 else 894 Imm.Mods = Mods; 895 } 896 897 bool hasModifiers() const { 898 return getModifiers().hasModifiers(); 899 } 900 901 bool hasFPModifiers() const { 902 return getModifiers().hasFPModifiers(); 903 } 904 905 bool hasIntModifiers() const { 906 return getModifiers().hasIntModifiers(); 907 } 908 909 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 910 911 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 912 913 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 914 915 template <unsigned Bitwidth> 916 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 917 918 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 919 addKImmFPOperands<16>(Inst, N); 920 } 921 922 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 923 addKImmFPOperands<32>(Inst, N); 924 } 925 926 void addRegOperands(MCInst &Inst, unsigned N) const; 927 928 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 929 addRegOperands(Inst, N); 930 } 931 932 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 933 if (isRegKind()) 934 addRegOperands(Inst, N); 935 else if (isExpr()) 936 Inst.addOperand(MCOperand::createExpr(Expr)); 937 else 938 addImmOperands(Inst, N); 939 } 940 941 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 942 Modifiers Mods = getModifiers(); 943 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 944 if (isRegKind()) { 945 addRegOperands(Inst, N); 946 } else { 947 addImmOperands(Inst, N, false); 948 } 949 } 950 951 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 952 assert(!hasIntModifiers()); 953 addRegOrImmWithInputModsOperands(Inst, N); 954 } 955 956 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 957 assert(!hasFPModifiers()); 958 addRegOrImmWithInputModsOperands(Inst, N); 959 } 960 961 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 962 Modifiers Mods = getModifiers(); 963 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 964 assert(isRegKind()); 965 addRegOperands(Inst, N); 966 } 967 968 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 969 assert(!hasIntModifiers()); 970 addRegWithInputModsOperands(Inst, N); 971 } 972 973 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 974 assert(!hasFPModifiers()); 975 addRegWithInputModsOperands(Inst, N); 976 } 977 978 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 979 if (isImm()) 980 addImmOperands(Inst, N); 981 else { 982 assert(isExpr()); 983 Inst.addOperand(MCOperand::createExpr(Expr)); 984 } 985 } 986 987 static void printImmTy(raw_ostream& OS, ImmTy Type) { 988 switch (Type) { 989 case ImmTyNone: OS << "None"; break; 990 case ImmTyGDS: OS << "GDS"; break; 991 case ImmTyLDS: OS << "LDS"; break; 992 case ImmTyOffen: OS << "Offen"; break; 993 case ImmTyIdxen: OS << "Idxen"; break; 994 case ImmTyAddr64: OS << "Addr64"; break; 995 case ImmTyOffset: OS << "Offset"; break; 996 case ImmTyInstOffset: OS << "InstOffset"; break; 997 case ImmTyOffset0: OS << "Offset0"; break; 998 case ImmTyOffset1: OS << "Offset1"; break; 999 case ImmTyCPol: OS << "CPol"; break; 1000 case ImmTySWZ: OS << "SWZ"; break; 1001 case ImmTyTFE: OS << "TFE"; break; 1002 case ImmTyD16: OS << "D16"; break; 1003 case ImmTyFORMAT: OS << "FORMAT"; break; 1004 case ImmTyClampSI: OS << "ClampSI"; break; 1005 case ImmTyOModSI: OS << "OModSI"; break; 1006 case ImmTyDPP8: OS << "DPP8"; break; 1007 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1008 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1009 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1010 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1011 case ImmTyDppFi: OS << "FI"; break; 1012 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1013 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1014 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1015 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1016 case ImmTyDMask: OS << "DMask"; break; 1017 case ImmTyDim: OS << "Dim"; break; 1018 case ImmTyUNorm: OS << "UNorm"; break; 1019 case ImmTyDA: OS << "DA"; break; 1020 case ImmTyR128A16: OS << "R128A16"; break; 1021 case ImmTyA16: OS << "A16"; break; 1022 case ImmTyLWE: OS << "LWE"; break; 1023 case ImmTyOff: OS << "Off"; break; 1024 case ImmTyExpTgt: OS << "ExpTgt"; break; 1025 case ImmTyExpCompr: OS << "ExpCompr"; break; 1026 case ImmTyExpVM: OS << "ExpVM"; break; 1027 case ImmTyHwreg: OS << "Hwreg"; break; 1028 case ImmTySendMsg: OS << "SendMsg"; break; 1029 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1030 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1031 case ImmTyAttrChan: OS << "AttrChan"; break; 1032 case ImmTyOpSel: OS << "OpSel"; break; 1033 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1034 case ImmTyNegLo: OS << "NegLo"; break; 1035 case ImmTyNegHi: OS << "NegHi"; break; 1036 case ImmTySwizzle: OS << "Swizzle"; break; 1037 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1038 case ImmTyHigh: OS << "High"; break; 1039 case ImmTyBLGP: OS << "BLGP"; break; 1040 case ImmTyCBSZ: OS << "CBSZ"; break; 1041 case ImmTyABID: OS << "ABID"; break; 1042 case ImmTyEndpgm: OS << "Endpgm"; break; 1043 } 1044 } 1045 1046 void print(raw_ostream &OS) const override { 1047 switch (Kind) { 1048 case Register: 1049 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1050 break; 1051 case Immediate: 1052 OS << '<' << getImm(); 1053 if (getImmTy() != ImmTyNone) { 1054 OS << " type: "; printImmTy(OS, getImmTy()); 1055 } 1056 OS << " mods: " << Imm.Mods << '>'; 1057 break; 1058 case Token: 1059 OS << '\'' << getToken() << '\''; 1060 break; 1061 case Expression: 1062 OS << "<expr " << *Expr << '>'; 1063 break; 1064 } 1065 } 1066 1067 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1068 int64_t Val, SMLoc Loc, 1069 ImmTy Type = ImmTyNone, 1070 bool IsFPImm = false) { 1071 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1072 Op->Imm.Val = Val; 1073 Op->Imm.IsFPImm = IsFPImm; 1074 Op->Imm.Kind = ImmKindTyNone; 1075 Op->Imm.Type = Type; 1076 Op->Imm.Mods = Modifiers(); 1077 Op->StartLoc = Loc; 1078 Op->EndLoc = Loc; 1079 return Op; 1080 } 1081 1082 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1083 StringRef Str, SMLoc Loc, 1084 bool HasExplicitEncodingSize = true) { 1085 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1086 Res->Tok.Data = Str.data(); 1087 Res->Tok.Length = Str.size(); 1088 Res->StartLoc = Loc; 1089 Res->EndLoc = Loc; 1090 return Res; 1091 } 1092 1093 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1094 unsigned RegNo, SMLoc S, 1095 SMLoc E) { 1096 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1097 Op->Reg.RegNo = RegNo; 1098 Op->Reg.Mods = Modifiers(); 1099 Op->StartLoc = S; 1100 Op->EndLoc = E; 1101 return Op; 1102 } 1103 1104 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1105 const class MCExpr *Expr, SMLoc S) { 1106 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1107 Op->Expr = Expr; 1108 Op->StartLoc = S; 1109 Op->EndLoc = S; 1110 return Op; 1111 } 1112 }; 1113 1114 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1115 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1116 return OS; 1117 } 1118 1119 //===----------------------------------------------------------------------===// 1120 // AsmParser 1121 //===----------------------------------------------------------------------===// 1122 1123 // Holds info related to the current kernel, e.g. count of SGPRs used. 1124 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1125 // .amdgpu_hsa_kernel or at EOF. 1126 class KernelScopeInfo { 1127 int SgprIndexUnusedMin = -1; 1128 int VgprIndexUnusedMin = -1; 1129 int AgprIndexUnusedMin = -1; 1130 MCContext *Ctx = nullptr; 1131 MCSubtargetInfo const *MSTI = nullptr; 1132 1133 void usesSgprAt(int i) { 1134 if (i >= SgprIndexUnusedMin) { 1135 SgprIndexUnusedMin = ++i; 1136 if (Ctx) { 1137 MCSymbol* const Sym = 1138 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 void usesVgprAt(int i) { 1145 if (i >= VgprIndexUnusedMin) { 1146 VgprIndexUnusedMin = ++i; 1147 if (Ctx) { 1148 MCSymbol* const Sym = 1149 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1150 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1151 VgprIndexUnusedMin); 1152 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1153 } 1154 } 1155 } 1156 1157 void usesAgprAt(int i) { 1158 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1159 if (!hasMAIInsts(*MSTI)) 1160 return; 1161 1162 if (i >= AgprIndexUnusedMin) { 1163 AgprIndexUnusedMin = ++i; 1164 if (Ctx) { 1165 MCSymbol* const Sym = 1166 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1167 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1168 1169 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1170 MCSymbol* const vSym = 1171 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1172 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1173 VgprIndexUnusedMin); 1174 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1175 } 1176 } 1177 } 1178 1179 public: 1180 KernelScopeInfo() = default; 1181 1182 void initialize(MCContext &Context) { 1183 Ctx = &Context; 1184 MSTI = Ctx->getSubtargetInfo(); 1185 1186 usesSgprAt(SgprIndexUnusedMin = -1); 1187 usesVgprAt(VgprIndexUnusedMin = -1); 1188 if (hasMAIInsts(*MSTI)) { 1189 usesAgprAt(AgprIndexUnusedMin = -1); 1190 } 1191 } 1192 1193 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1194 unsigned RegWidth) { 1195 switch (RegKind) { 1196 case IS_SGPR: 1197 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1198 break; 1199 case IS_AGPR: 1200 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1201 break; 1202 case IS_VGPR: 1203 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1204 break; 1205 default: 1206 break; 1207 } 1208 } 1209 }; 1210 1211 class AMDGPUAsmParser : public MCTargetAsmParser { 1212 MCAsmParser &Parser; 1213 1214 // Number of extra operands parsed after the first optional operand. 1215 // This may be necessary to skip hardcoded mandatory operands. 1216 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1217 1218 unsigned ForcedEncodingSize = 0; 1219 bool ForcedDPP = false; 1220 bool ForcedSDWA = false; 1221 KernelScopeInfo KernelScope; 1222 unsigned CPolSeen; 1223 1224 /// @name Auto-generated Match Functions 1225 /// { 1226 1227 #define GET_ASSEMBLER_HEADER 1228 #include "AMDGPUGenAsmMatcher.inc" 1229 1230 /// } 1231 1232 private: 1233 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1234 bool OutOfRangeError(SMRange Range); 1235 /// Calculate VGPR/SGPR blocks required for given target, reserved 1236 /// registers, and user-specified NextFreeXGPR values. 1237 /// 1238 /// \param Features [in] Target features, used for bug corrections. 1239 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1240 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1241 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1242 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1243 /// descriptor field, if valid. 1244 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1245 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1246 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1247 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1248 /// \param VGPRBlocks [out] Result VGPR block count. 1249 /// \param SGPRBlocks [out] Result SGPR block count. 1250 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1251 bool FlatScrUsed, bool XNACKUsed, 1252 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1253 SMRange VGPRRange, unsigned NextFreeSGPR, 1254 SMRange SGPRRange, unsigned &VGPRBlocks, 1255 unsigned &SGPRBlocks); 1256 bool ParseDirectiveAMDGCNTarget(); 1257 bool ParseDirectiveAMDHSAKernel(); 1258 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1259 bool ParseDirectiveHSACodeObjectVersion(); 1260 bool ParseDirectiveHSACodeObjectISA(); 1261 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1262 bool ParseDirectiveAMDKernelCodeT(); 1263 // TODO: Possibly make subtargetHasRegister const. 1264 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1265 bool ParseDirectiveAMDGPUHsaKernel(); 1266 1267 bool ParseDirectiveISAVersion(); 1268 bool ParseDirectiveHSAMetadata(); 1269 bool ParseDirectivePALMetadataBegin(); 1270 bool ParseDirectivePALMetadata(); 1271 bool ParseDirectiveAMDGPULDS(); 1272 1273 /// Common code to parse out a block of text (typically YAML) between start and 1274 /// end directives. 1275 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1276 const char *AssemblerDirectiveEnd, 1277 std::string &CollectString); 1278 1279 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1280 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1281 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1282 unsigned &RegNum, unsigned &RegWidth, 1283 bool RestoreOnFailure = false); 1284 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1285 unsigned &RegNum, unsigned &RegWidth, 1286 SmallVectorImpl<AsmToken> &Tokens); 1287 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1288 unsigned &RegWidth, 1289 SmallVectorImpl<AsmToken> &Tokens); 1290 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1291 unsigned &RegWidth, 1292 SmallVectorImpl<AsmToken> &Tokens); 1293 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1294 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1295 bool ParseRegRange(unsigned& Num, unsigned& Width); 1296 unsigned getRegularReg(RegisterKind RegKind, 1297 unsigned RegNum, 1298 unsigned RegWidth, 1299 SMLoc Loc); 1300 1301 bool isRegister(); 1302 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1303 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1304 void initializeGprCountSymbol(RegisterKind RegKind); 1305 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1306 unsigned RegWidth); 1307 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1308 bool IsAtomic, bool IsLds = false); 1309 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1310 bool IsGdsHardcoded); 1311 1312 public: 1313 enum AMDGPUMatchResultTy { 1314 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1315 }; 1316 enum OperandMode { 1317 OperandMode_Default, 1318 OperandMode_NSA, 1319 }; 1320 1321 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1322 1323 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1324 const MCInstrInfo &MII, 1325 const MCTargetOptions &Options) 1326 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1327 MCAsmParserExtension::Initialize(Parser); 1328 1329 if (getFeatureBits().none()) { 1330 // Set default features. 1331 copySTI().ToggleFeature("southern-islands"); 1332 } 1333 1334 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1335 1336 { 1337 // TODO: make those pre-defined variables read-only. 1338 // Currently there is none suitable machinery in the core llvm-mc for this. 1339 // MCSymbol::isRedefinable is intended for another purpose, and 1340 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1341 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1342 MCContext &Ctx = getContext(); 1343 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1344 MCSymbol *Sym = 1345 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1346 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1347 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1348 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1349 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1350 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1351 } else { 1352 MCSymbol *Sym = 1353 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1354 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1355 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1356 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1357 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1358 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1359 } 1360 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1361 initializeGprCountSymbol(IS_VGPR); 1362 initializeGprCountSymbol(IS_SGPR); 1363 } else 1364 KernelScope.initialize(getContext()); 1365 } 1366 } 1367 1368 bool hasMIMG_R128() const { 1369 return AMDGPU::hasMIMG_R128(getSTI()); 1370 } 1371 1372 bool hasPackedD16() const { 1373 return AMDGPU::hasPackedD16(getSTI()); 1374 } 1375 1376 bool hasGFX10A16() const { 1377 return AMDGPU::hasGFX10A16(getSTI()); 1378 } 1379 1380 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1381 1382 bool isSI() const { 1383 return AMDGPU::isSI(getSTI()); 1384 } 1385 1386 bool isCI() const { 1387 return AMDGPU::isCI(getSTI()); 1388 } 1389 1390 bool isVI() const { 1391 return AMDGPU::isVI(getSTI()); 1392 } 1393 1394 bool isGFX9() const { 1395 return AMDGPU::isGFX9(getSTI()); 1396 } 1397 1398 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1399 bool isGFX90A() const { 1400 return AMDGPU::isGFX90A(getSTI()); 1401 } 1402 1403 bool isGFX940() const { 1404 return AMDGPU::isGFX940(getSTI()); 1405 } 1406 1407 bool isGFX9Plus() const { 1408 return AMDGPU::isGFX9Plus(getSTI()); 1409 } 1410 1411 bool isGFX10() const { 1412 return AMDGPU::isGFX10(getSTI()); 1413 } 1414 1415 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1416 1417 bool isGFX10_BEncoding() const { 1418 return AMDGPU::isGFX10_BEncoding(getSTI()); 1419 } 1420 1421 bool hasInv2PiInlineImm() const { 1422 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1423 } 1424 1425 bool hasFlatOffsets() const { 1426 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1427 } 1428 1429 bool hasArchitectedFlatScratch() const { 1430 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1431 } 1432 1433 bool hasSGPR102_SGPR103() const { 1434 return !isVI() && !isGFX9(); 1435 } 1436 1437 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1438 1439 bool hasIntClamp() const { 1440 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1441 } 1442 1443 AMDGPUTargetStreamer &getTargetStreamer() { 1444 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1445 return static_cast<AMDGPUTargetStreamer &>(TS); 1446 } 1447 1448 const MCRegisterInfo *getMRI() const { 1449 // We need this const_cast because for some reason getContext() is not const 1450 // in MCAsmParser. 1451 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1452 } 1453 1454 const MCInstrInfo *getMII() const { 1455 return &MII; 1456 } 1457 1458 const FeatureBitset &getFeatureBits() const { 1459 return getSTI().getFeatureBits(); 1460 } 1461 1462 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1463 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1464 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1465 1466 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1467 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1468 bool isForcedDPP() const { return ForcedDPP; } 1469 bool isForcedSDWA() const { return ForcedSDWA; } 1470 ArrayRef<unsigned> getMatchedVariants() const; 1471 StringRef getMatchedVariantName() const; 1472 1473 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1474 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1475 bool RestoreOnFailure); 1476 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1477 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1478 SMLoc &EndLoc) override; 1479 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1480 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1481 unsigned Kind) override; 1482 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1483 OperandVector &Operands, MCStreamer &Out, 1484 uint64_t &ErrorInfo, 1485 bool MatchingInlineAsm) override; 1486 bool ParseDirective(AsmToken DirectiveID) override; 1487 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1488 OperandMode Mode = OperandMode_Default); 1489 StringRef parseMnemonicSuffix(StringRef Name); 1490 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1491 SMLoc NameLoc, OperandVector &Operands) override; 1492 //bool ProcessInstruction(MCInst &Inst); 1493 1494 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1495 1496 OperandMatchResultTy 1497 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1498 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1499 bool (*ConvertResult)(int64_t &) = nullptr); 1500 1501 OperandMatchResultTy 1502 parseOperandArrayWithPrefix(const char *Prefix, 1503 OperandVector &Operands, 1504 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1505 bool (*ConvertResult)(int64_t&) = nullptr); 1506 1507 OperandMatchResultTy 1508 parseNamedBit(StringRef Name, OperandVector &Operands, 1509 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1510 OperandMatchResultTy parseCPol(OperandVector &Operands); 1511 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1512 StringRef &Value, 1513 SMLoc &StringLoc); 1514 1515 bool isModifier(); 1516 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1517 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1518 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1519 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1520 bool parseSP3NegModifier(); 1521 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1522 OperandMatchResultTy parseReg(OperandVector &Operands); 1523 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1524 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1525 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1526 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1527 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1528 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1529 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1530 OperandMatchResultTy parseUfmt(int64_t &Format); 1531 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1532 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1533 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1534 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1535 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1536 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1537 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1538 1539 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1540 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1541 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1542 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1543 1544 bool parseCnt(int64_t &IntVal); 1545 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1546 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1547 1548 private: 1549 struct OperandInfoTy { 1550 SMLoc Loc; 1551 int64_t Id; 1552 bool IsSymbolic = false; 1553 bool IsDefined = false; 1554 1555 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1556 }; 1557 1558 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1559 bool validateSendMsg(const OperandInfoTy &Msg, 1560 const OperandInfoTy &Op, 1561 const OperandInfoTy &Stream); 1562 1563 bool parseHwregBody(OperandInfoTy &HwReg, 1564 OperandInfoTy &Offset, 1565 OperandInfoTy &Width); 1566 bool validateHwreg(const OperandInfoTy &HwReg, 1567 const OperandInfoTy &Offset, 1568 const OperandInfoTy &Width); 1569 1570 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1571 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1572 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1573 1574 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1575 const OperandVector &Operands) const; 1576 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1577 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1578 SMLoc getLitLoc(const OperandVector &Operands) const; 1579 SMLoc getConstLoc(const OperandVector &Operands) const; 1580 1581 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1582 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1583 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1584 bool validateSOPLiteral(const MCInst &Inst) const; 1585 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1586 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1587 bool validateIntClampSupported(const MCInst &Inst); 1588 bool validateMIMGAtomicDMask(const MCInst &Inst); 1589 bool validateMIMGGatherDMask(const MCInst &Inst); 1590 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1591 bool validateMIMGDataSize(const MCInst &Inst); 1592 bool validateMIMGAddrSize(const MCInst &Inst); 1593 bool validateMIMGD16(const MCInst &Inst); 1594 bool validateMIMGDim(const MCInst &Inst); 1595 bool validateMIMGMSAA(const MCInst &Inst); 1596 bool validateOpSel(const MCInst &Inst); 1597 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1598 bool validateVccOperand(unsigned Reg) const; 1599 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1600 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1601 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1602 bool validateAGPRLdSt(const MCInst &Inst) const; 1603 bool validateVGPRAlign(const MCInst &Inst) const; 1604 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1605 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1606 bool validateDivScale(const MCInst &Inst); 1607 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1608 const SMLoc &IDLoc); 1609 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1610 unsigned getConstantBusLimit(unsigned Opcode) const; 1611 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1612 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1613 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1614 1615 bool isSupportedMnemo(StringRef Mnemo, 1616 const FeatureBitset &FBS); 1617 bool isSupportedMnemo(StringRef Mnemo, 1618 const FeatureBitset &FBS, 1619 ArrayRef<unsigned> Variants); 1620 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1621 1622 bool isId(const StringRef Id) const; 1623 bool isId(const AsmToken &Token, const StringRef Id) const; 1624 bool isToken(const AsmToken::TokenKind Kind) const; 1625 bool trySkipId(const StringRef Id); 1626 bool trySkipId(const StringRef Pref, const StringRef Id); 1627 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1628 bool trySkipToken(const AsmToken::TokenKind Kind); 1629 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1630 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1631 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1632 1633 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1634 AsmToken::TokenKind getTokenKind() const; 1635 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1636 bool parseExpr(OperandVector &Operands); 1637 StringRef getTokenStr() const; 1638 AsmToken peekToken(); 1639 AsmToken getToken() const; 1640 SMLoc getLoc() const; 1641 void lex(); 1642 1643 public: 1644 void onBeginOfFile() override; 1645 1646 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1647 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1648 1649 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1650 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1651 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1652 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1653 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1654 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1655 1656 bool parseSwizzleOperand(int64_t &Op, 1657 const unsigned MinVal, 1658 const unsigned MaxVal, 1659 const StringRef ErrMsg, 1660 SMLoc &Loc); 1661 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1662 const unsigned MinVal, 1663 const unsigned MaxVal, 1664 const StringRef ErrMsg); 1665 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1666 bool parseSwizzleOffset(int64_t &Imm); 1667 bool parseSwizzleMacro(int64_t &Imm); 1668 bool parseSwizzleQuadPerm(int64_t &Imm); 1669 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1670 bool parseSwizzleBroadcast(int64_t &Imm); 1671 bool parseSwizzleSwap(int64_t &Imm); 1672 bool parseSwizzleReverse(int64_t &Imm); 1673 1674 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1675 int64_t parseGPRIdxMacro(); 1676 1677 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1678 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1679 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1680 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1681 1682 AMDGPUOperand::Ptr defaultCPol() const; 1683 1684 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1685 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1686 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1687 AMDGPUOperand::Ptr defaultFlatOffset() const; 1688 1689 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1690 1691 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1692 OptionalImmIndexMap &OptionalIdx); 1693 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1694 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1695 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1696 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1697 OptionalImmIndexMap &OptionalIdx); 1698 1699 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1700 1701 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1702 bool IsAtomic = false); 1703 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1704 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1705 1706 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1707 1708 bool parseDimId(unsigned &Encoding); 1709 OperandMatchResultTy parseDim(OperandVector &Operands); 1710 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1711 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1712 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1713 int64_t parseDPPCtrlSel(StringRef Ctrl); 1714 int64_t parseDPPCtrlPerm(); 1715 AMDGPUOperand::Ptr defaultRowMask() const; 1716 AMDGPUOperand::Ptr defaultBankMask() const; 1717 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1718 AMDGPUOperand::Ptr defaultFI() const; 1719 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1720 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1721 1722 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1723 AMDGPUOperand::ImmTy Type); 1724 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1725 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1726 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1727 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1728 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1729 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1730 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1731 uint64_t BasicInstType, 1732 bool SkipDstVcc = false, 1733 bool SkipSrcVcc = false); 1734 1735 AMDGPUOperand::Ptr defaultBLGP() const; 1736 AMDGPUOperand::Ptr defaultCBSZ() const; 1737 AMDGPUOperand::Ptr defaultABID() const; 1738 1739 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1740 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1741 }; 1742 1743 struct OptionalOperand { 1744 const char *Name; 1745 AMDGPUOperand::ImmTy Type; 1746 bool IsBit; 1747 bool (*ConvertResult)(int64_t&); 1748 }; 1749 1750 } // end anonymous namespace 1751 1752 // May be called with integer type with equivalent bitwidth. 1753 static const fltSemantics *getFltSemantics(unsigned Size) { 1754 switch (Size) { 1755 case 4: 1756 return &APFloat::IEEEsingle(); 1757 case 8: 1758 return &APFloat::IEEEdouble(); 1759 case 2: 1760 return &APFloat::IEEEhalf(); 1761 default: 1762 llvm_unreachable("unsupported fp type"); 1763 } 1764 } 1765 1766 static const fltSemantics *getFltSemantics(MVT VT) { 1767 return getFltSemantics(VT.getSizeInBits() / 8); 1768 } 1769 1770 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1771 switch (OperandType) { 1772 case AMDGPU::OPERAND_REG_IMM_INT32: 1773 case AMDGPU::OPERAND_REG_IMM_FP32: 1774 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1775 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1776 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1777 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1778 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1779 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1780 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1781 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1782 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1783 case AMDGPU::OPERAND_KIMM32: 1784 return &APFloat::IEEEsingle(); 1785 case AMDGPU::OPERAND_REG_IMM_INT64: 1786 case AMDGPU::OPERAND_REG_IMM_FP64: 1787 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1788 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1789 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1790 return &APFloat::IEEEdouble(); 1791 case AMDGPU::OPERAND_REG_IMM_INT16: 1792 case AMDGPU::OPERAND_REG_IMM_FP16: 1793 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1794 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1795 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1796 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1797 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1798 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1799 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1800 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1801 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1802 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1803 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1804 case AMDGPU::OPERAND_KIMM16: 1805 return &APFloat::IEEEhalf(); 1806 default: 1807 llvm_unreachable("unsupported fp type"); 1808 } 1809 } 1810 1811 //===----------------------------------------------------------------------===// 1812 // Operand 1813 //===----------------------------------------------------------------------===// 1814 1815 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1816 bool Lost; 1817 1818 // Convert literal to single precision 1819 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1820 APFloat::rmNearestTiesToEven, 1821 &Lost); 1822 // We allow precision lost but not overflow or underflow 1823 if (Status != APFloat::opOK && 1824 Lost && 1825 ((Status & APFloat::opOverflow) != 0 || 1826 (Status & APFloat::opUnderflow) != 0)) { 1827 return false; 1828 } 1829 1830 return true; 1831 } 1832 1833 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1834 return isUIntN(Size, Val) || isIntN(Size, Val); 1835 } 1836 1837 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1838 if (VT.getScalarType() == MVT::i16) { 1839 // FP immediate values are broken. 1840 return isInlinableIntLiteral(Val); 1841 } 1842 1843 // f16/v2f16 operands work correctly for all values. 1844 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1845 } 1846 1847 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1848 1849 // This is a hack to enable named inline values like 1850 // shared_base with both 32-bit and 64-bit operands. 1851 // Note that these values are defined as 1852 // 32-bit operands only. 1853 if (isInlineValue()) { 1854 return true; 1855 } 1856 1857 if (!isImmTy(ImmTyNone)) { 1858 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1859 return false; 1860 } 1861 // TODO: We should avoid using host float here. It would be better to 1862 // check the float bit values which is what a few other places do. 1863 // We've had bot failures before due to weird NaN support on mips hosts. 1864 1865 APInt Literal(64, Imm.Val); 1866 1867 if (Imm.IsFPImm) { // We got fp literal token 1868 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1869 return AMDGPU::isInlinableLiteral64(Imm.Val, 1870 AsmParser->hasInv2PiInlineImm()); 1871 } 1872 1873 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1874 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1875 return false; 1876 1877 if (type.getScalarSizeInBits() == 16) { 1878 return isInlineableLiteralOp16( 1879 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1880 type, AsmParser->hasInv2PiInlineImm()); 1881 } 1882 1883 // Check if single precision literal is inlinable 1884 return AMDGPU::isInlinableLiteral32( 1885 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1886 AsmParser->hasInv2PiInlineImm()); 1887 } 1888 1889 // We got int literal token. 1890 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1891 return AMDGPU::isInlinableLiteral64(Imm.Val, 1892 AsmParser->hasInv2PiInlineImm()); 1893 } 1894 1895 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1896 return false; 1897 } 1898 1899 if (type.getScalarSizeInBits() == 16) { 1900 return isInlineableLiteralOp16( 1901 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1902 type, AsmParser->hasInv2PiInlineImm()); 1903 } 1904 1905 return AMDGPU::isInlinableLiteral32( 1906 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1907 AsmParser->hasInv2PiInlineImm()); 1908 } 1909 1910 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1911 // Check that this immediate can be added as literal 1912 if (!isImmTy(ImmTyNone)) { 1913 return false; 1914 } 1915 1916 if (!Imm.IsFPImm) { 1917 // We got int literal token. 1918 1919 if (type == MVT::f64 && hasFPModifiers()) { 1920 // Cannot apply fp modifiers to int literals preserving the same semantics 1921 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1922 // disable these cases. 1923 return false; 1924 } 1925 1926 unsigned Size = type.getSizeInBits(); 1927 if (Size == 64) 1928 Size = 32; 1929 1930 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1931 // types. 1932 return isSafeTruncation(Imm.Val, Size); 1933 } 1934 1935 // We got fp literal token 1936 if (type == MVT::f64) { // Expected 64-bit fp operand 1937 // We would set low 64-bits of literal to zeroes but we accept this literals 1938 return true; 1939 } 1940 1941 if (type == MVT::i64) { // Expected 64-bit int operand 1942 // We don't allow fp literals in 64-bit integer instructions. It is 1943 // unclear how we should encode them. 1944 return false; 1945 } 1946 1947 // We allow fp literals with f16x2 operands assuming that the specified 1948 // literal goes into the lower half and the upper half is zero. We also 1949 // require that the literal may be losslessly converted to f16. 1950 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1951 (type == MVT::v2i16)? MVT::i16 : 1952 (type == MVT::v2f32)? MVT::f32 : type; 1953 1954 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1955 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1956 } 1957 1958 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1959 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1960 } 1961 1962 bool AMDGPUOperand::isVRegWithInputMods() const { 1963 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1964 // GFX90A allows DPP on 64-bit operands. 1965 (isRegClass(AMDGPU::VReg_64RegClassID) && 1966 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1967 } 1968 1969 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1970 if (AsmParser->isVI()) 1971 return isVReg32(); 1972 else if (AsmParser->isGFX9Plus()) 1973 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1974 else 1975 return false; 1976 } 1977 1978 bool AMDGPUOperand::isSDWAFP16Operand() const { 1979 return isSDWAOperand(MVT::f16); 1980 } 1981 1982 bool AMDGPUOperand::isSDWAFP32Operand() const { 1983 return isSDWAOperand(MVT::f32); 1984 } 1985 1986 bool AMDGPUOperand::isSDWAInt16Operand() const { 1987 return isSDWAOperand(MVT::i16); 1988 } 1989 1990 bool AMDGPUOperand::isSDWAInt32Operand() const { 1991 return isSDWAOperand(MVT::i32); 1992 } 1993 1994 bool AMDGPUOperand::isBoolReg() const { 1995 auto FB = AsmParser->getFeatureBits(); 1996 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1997 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1998 } 1999 2000 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2001 { 2002 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2003 assert(Size == 2 || Size == 4 || Size == 8); 2004 2005 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2006 2007 if (Imm.Mods.Abs) { 2008 Val &= ~FpSignMask; 2009 } 2010 if (Imm.Mods.Neg) { 2011 Val ^= FpSignMask; 2012 } 2013 2014 return Val; 2015 } 2016 2017 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2018 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2019 Inst.getNumOperands())) { 2020 addLiteralImmOperand(Inst, Imm.Val, 2021 ApplyModifiers & 2022 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2023 } else { 2024 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2025 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2026 setImmKindNone(); 2027 } 2028 } 2029 2030 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2031 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2032 auto OpNum = Inst.getNumOperands(); 2033 // Check that this operand accepts literals 2034 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2035 2036 if (ApplyModifiers) { 2037 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2038 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2039 Val = applyInputFPModifiers(Val, Size); 2040 } 2041 2042 APInt Literal(64, Val); 2043 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2044 2045 if (Imm.IsFPImm) { // We got fp literal token 2046 switch (OpTy) { 2047 case AMDGPU::OPERAND_REG_IMM_INT64: 2048 case AMDGPU::OPERAND_REG_IMM_FP64: 2049 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2050 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2051 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2052 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2053 AsmParser->hasInv2PiInlineImm())) { 2054 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2055 setImmKindConst(); 2056 return; 2057 } 2058 2059 // Non-inlineable 2060 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2061 // For fp operands we check if low 32 bits are zeros 2062 if (Literal.getLoBits(32) != 0) { 2063 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2064 "Can't encode literal as exact 64-bit floating-point operand. " 2065 "Low 32-bits will be set to zero"); 2066 } 2067 2068 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2069 setImmKindLiteral(); 2070 return; 2071 } 2072 2073 // We don't allow fp literals in 64-bit integer instructions. It is 2074 // unclear how we should encode them. This case should be checked earlier 2075 // in predicate methods (isLiteralImm()) 2076 llvm_unreachable("fp literal in 64-bit integer instruction."); 2077 2078 case AMDGPU::OPERAND_REG_IMM_INT32: 2079 case AMDGPU::OPERAND_REG_IMM_FP32: 2080 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2081 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2082 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2083 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2084 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2085 case AMDGPU::OPERAND_REG_IMM_INT16: 2086 case AMDGPU::OPERAND_REG_IMM_FP16: 2087 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2088 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2089 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2090 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2091 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2092 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2093 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2094 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2095 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2096 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2097 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2098 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2099 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2100 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2101 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2102 case AMDGPU::OPERAND_KIMM32: 2103 case AMDGPU::OPERAND_KIMM16: { 2104 bool lost; 2105 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2106 // Convert literal to single precision 2107 FPLiteral.convert(*getOpFltSemantics(OpTy), 2108 APFloat::rmNearestTiesToEven, &lost); 2109 // We allow precision lost but not overflow or underflow. This should be 2110 // checked earlier in isLiteralImm() 2111 2112 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2113 Inst.addOperand(MCOperand::createImm(ImmVal)); 2114 setImmKindLiteral(); 2115 return; 2116 } 2117 default: 2118 llvm_unreachable("invalid operand size"); 2119 } 2120 2121 return; 2122 } 2123 2124 // We got int literal token. 2125 // Only sign extend inline immediates. 2126 switch (OpTy) { 2127 case AMDGPU::OPERAND_REG_IMM_INT32: 2128 case AMDGPU::OPERAND_REG_IMM_FP32: 2129 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2130 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2131 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2132 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2133 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2134 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2135 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2136 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2137 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2138 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2139 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2140 if (isSafeTruncation(Val, 32) && 2141 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2142 AsmParser->hasInv2PiInlineImm())) { 2143 Inst.addOperand(MCOperand::createImm(Val)); 2144 setImmKindConst(); 2145 return; 2146 } 2147 2148 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2149 setImmKindLiteral(); 2150 return; 2151 2152 case AMDGPU::OPERAND_REG_IMM_INT64: 2153 case AMDGPU::OPERAND_REG_IMM_FP64: 2154 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2155 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2156 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2157 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2158 Inst.addOperand(MCOperand::createImm(Val)); 2159 setImmKindConst(); 2160 return; 2161 } 2162 2163 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2164 setImmKindLiteral(); 2165 return; 2166 2167 case AMDGPU::OPERAND_REG_IMM_INT16: 2168 case AMDGPU::OPERAND_REG_IMM_FP16: 2169 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2170 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2171 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2172 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2173 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2174 if (isSafeTruncation(Val, 16) && 2175 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2176 AsmParser->hasInv2PiInlineImm())) { 2177 Inst.addOperand(MCOperand::createImm(Val)); 2178 setImmKindConst(); 2179 return; 2180 } 2181 2182 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2183 setImmKindLiteral(); 2184 return; 2185 2186 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2187 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2188 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2189 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2190 assert(isSafeTruncation(Val, 16)); 2191 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2192 AsmParser->hasInv2PiInlineImm())); 2193 2194 Inst.addOperand(MCOperand::createImm(Val)); 2195 return; 2196 } 2197 case AMDGPU::OPERAND_KIMM32: 2198 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2199 setImmKindNone(); 2200 return; 2201 case AMDGPU::OPERAND_KIMM16: 2202 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2203 setImmKindNone(); 2204 return; 2205 default: 2206 llvm_unreachable("invalid operand size"); 2207 } 2208 } 2209 2210 template <unsigned Bitwidth> 2211 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2212 APInt Literal(64, Imm.Val); 2213 setImmKindNone(); 2214 2215 if (!Imm.IsFPImm) { 2216 // We got int literal token. 2217 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2218 return; 2219 } 2220 2221 bool Lost; 2222 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2223 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2224 APFloat::rmNearestTiesToEven, &Lost); 2225 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2226 } 2227 2228 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2229 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2230 } 2231 2232 static bool isInlineValue(unsigned Reg) { 2233 switch (Reg) { 2234 case AMDGPU::SRC_SHARED_BASE: 2235 case AMDGPU::SRC_SHARED_LIMIT: 2236 case AMDGPU::SRC_PRIVATE_BASE: 2237 case AMDGPU::SRC_PRIVATE_LIMIT: 2238 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2239 return true; 2240 case AMDGPU::SRC_VCCZ: 2241 case AMDGPU::SRC_EXECZ: 2242 case AMDGPU::SRC_SCC: 2243 return true; 2244 case AMDGPU::SGPR_NULL: 2245 return true; 2246 default: 2247 return false; 2248 } 2249 } 2250 2251 bool AMDGPUOperand::isInlineValue() const { 2252 return isRegKind() && ::isInlineValue(getReg()); 2253 } 2254 2255 //===----------------------------------------------------------------------===// 2256 // AsmParser 2257 //===----------------------------------------------------------------------===// 2258 2259 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2260 if (Is == IS_VGPR) { 2261 switch (RegWidth) { 2262 default: return -1; 2263 case 32: 2264 return AMDGPU::VGPR_32RegClassID; 2265 case 64: 2266 return AMDGPU::VReg_64RegClassID; 2267 case 96: 2268 return AMDGPU::VReg_96RegClassID; 2269 case 128: 2270 return AMDGPU::VReg_128RegClassID; 2271 case 160: 2272 return AMDGPU::VReg_160RegClassID; 2273 case 192: 2274 return AMDGPU::VReg_192RegClassID; 2275 case 224: 2276 return AMDGPU::VReg_224RegClassID; 2277 case 256: 2278 return AMDGPU::VReg_256RegClassID; 2279 case 512: 2280 return AMDGPU::VReg_512RegClassID; 2281 case 1024: 2282 return AMDGPU::VReg_1024RegClassID; 2283 } 2284 } else if (Is == IS_TTMP) { 2285 switch (RegWidth) { 2286 default: return -1; 2287 case 32: 2288 return AMDGPU::TTMP_32RegClassID; 2289 case 64: 2290 return AMDGPU::TTMP_64RegClassID; 2291 case 128: 2292 return AMDGPU::TTMP_128RegClassID; 2293 case 256: 2294 return AMDGPU::TTMP_256RegClassID; 2295 case 512: 2296 return AMDGPU::TTMP_512RegClassID; 2297 } 2298 } else if (Is == IS_SGPR) { 2299 switch (RegWidth) { 2300 default: return -1; 2301 case 32: 2302 return AMDGPU::SGPR_32RegClassID; 2303 case 64: 2304 return AMDGPU::SGPR_64RegClassID; 2305 case 96: 2306 return AMDGPU::SGPR_96RegClassID; 2307 case 128: 2308 return AMDGPU::SGPR_128RegClassID; 2309 case 160: 2310 return AMDGPU::SGPR_160RegClassID; 2311 case 192: 2312 return AMDGPU::SGPR_192RegClassID; 2313 case 224: 2314 return AMDGPU::SGPR_224RegClassID; 2315 case 256: 2316 return AMDGPU::SGPR_256RegClassID; 2317 case 512: 2318 return AMDGPU::SGPR_512RegClassID; 2319 } 2320 } else if (Is == IS_AGPR) { 2321 switch (RegWidth) { 2322 default: return -1; 2323 case 32: 2324 return AMDGPU::AGPR_32RegClassID; 2325 case 64: 2326 return AMDGPU::AReg_64RegClassID; 2327 case 96: 2328 return AMDGPU::AReg_96RegClassID; 2329 case 128: 2330 return AMDGPU::AReg_128RegClassID; 2331 case 160: 2332 return AMDGPU::AReg_160RegClassID; 2333 case 192: 2334 return AMDGPU::AReg_192RegClassID; 2335 case 224: 2336 return AMDGPU::AReg_224RegClassID; 2337 case 256: 2338 return AMDGPU::AReg_256RegClassID; 2339 case 512: 2340 return AMDGPU::AReg_512RegClassID; 2341 case 1024: 2342 return AMDGPU::AReg_1024RegClassID; 2343 } 2344 } 2345 return -1; 2346 } 2347 2348 static unsigned getSpecialRegForName(StringRef RegName) { 2349 return StringSwitch<unsigned>(RegName) 2350 .Case("exec", AMDGPU::EXEC) 2351 .Case("vcc", AMDGPU::VCC) 2352 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2353 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2354 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2355 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2356 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2357 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2358 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2359 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2360 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2361 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2362 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2363 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2364 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2365 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2366 .Case("m0", AMDGPU::M0) 2367 .Case("vccz", AMDGPU::SRC_VCCZ) 2368 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2369 .Case("execz", AMDGPU::SRC_EXECZ) 2370 .Case("src_execz", AMDGPU::SRC_EXECZ) 2371 .Case("scc", AMDGPU::SRC_SCC) 2372 .Case("src_scc", AMDGPU::SRC_SCC) 2373 .Case("tba", AMDGPU::TBA) 2374 .Case("tma", AMDGPU::TMA) 2375 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2376 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2377 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2378 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2379 .Case("vcc_lo", AMDGPU::VCC_LO) 2380 .Case("vcc_hi", AMDGPU::VCC_HI) 2381 .Case("exec_lo", AMDGPU::EXEC_LO) 2382 .Case("exec_hi", AMDGPU::EXEC_HI) 2383 .Case("tma_lo", AMDGPU::TMA_LO) 2384 .Case("tma_hi", AMDGPU::TMA_HI) 2385 .Case("tba_lo", AMDGPU::TBA_LO) 2386 .Case("tba_hi", AMDGPU::TBA_HI) 2387 .Case("pc", AMDGPU::PC_REG) 2388 .Case("null", AMDGPU::SGPR_NULL) 2389 .Default(AMDGPU::NoRegister); 2390 } 2391 2392 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2393 SMLoc &EndLoc, bool RestoreOnFailure) { 2394 auto R = parseRegister(); 2395 if (!R) return true; 2396 assert(R->isReg()); 2397 RegNo = R->getReg(); 2398 StartLoc = R->getStartLoc(); 2399 EndLoc = R->getEndLoc(); 2400 return false; 2401 } 2402 2403 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2404 SMLoc &EndLoc) { 2405 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2406 } 2407 2408 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2409 SMLoc &StartLoc, 2410 SMLoc &EndLoc) { 2411 bool Result = 2412 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2413 bool PendingErrors = getParser().hasPendingError(); 2414 getParser().clearPendingErrors(); 2415 if (PendingErrors) 2416 return MatchOperand_ParseFail; 2417 if (Result) 2418 return MatchOperand_NoMatch; 2419 return MatchOperand_Success; 2420 } 2421 2422 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2423 RegisterKind RegKind, unsigned Reg1, 2424 SMLoc Loc) { 2425 switch (RegKind) { 2426 case IS_SPECIAL: 2427 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2428 Reg = AMDGPU::EXEC; 2429 RegWidth = 64; 2430 return true; 2431 } 2432 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2433 Reg = AMDGPU::FLAT_SCR; 2434 RegWidth = 64; 2435 return true; 2436 } 2437 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2438 Reg = AMDGPU::XNACK_MASK; 2439 RegWidth = 64; 2440 return true; 2441 } 2442 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2443 Reg = AMDGPU::VCC; 2444 RegWidth = 64; 2445 return true; 2446 } 2447 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2448 Reg = AMDGPU::TBA; 2449 RegWidth = 64; 2450 return true; 2451 } 2452 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2453 Reg = AMDGPU::TMA; 2454 RegWidth = 64; 2455 return true; 2456 } 2457 Error(Loc, "register does not fit in the list"); 2458 return false; 2459 case IS_VGPR: 2460 case IS_SGPR: 2461 case IS_AGPR: 2462 case IS_TTMP: 2463 if (Reg1 != Reg + RegWidth / 32) { 2464 Error(Loc, "registers in a list must have consecutive indices"); 2465 return false; 2466 } 2467 RegWidth += 32; 2468 return true; 2469 default: 2470 llvm_unreachable("unexpected register kind"); 2471 } 2472 } 2473 2474 struct RegInfo { 2475 StringLiteral Name; 2476 RegisterKind Kind; 2477 }; 2478 2479 static constexpr RegInfo RegularRegisters[] = { 2480 {{"v"}, IS_VGPR}, 2481 {{"s"}, IS_SGPR}, 2482 {{"ttmp"}, IS_TTMP}, 2483 {{"acc"}, IS_AGPR}, 2484 {{"a"}, IS_AGPR}, 2485 }; 2486 2487 static bool isRegularReg(RegisterKind Kind) { 2488 return Kind == IS_VGPR || 2489 Kind == IS_SGPR || 2490 Kind == IS_TTMP || 2491 Kind == IS_AGPR; 2492 } 2493 2494 static const RegInfo* getRegularRegInfo(StringRef Str) { 2495 for (const RegInfo &Reg : RegularRegisters) 2496 if (Str.startswith(Reg.Name)) 2497 return &Reg; 2498 return nullptr; 2499 } 2500 2501 static bool getRegNum(StringRef Str, unsigned& Num) { 2502 return !Str.getAsInteger(10, Num); 2503 } 2504 2505 bool 2506 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2507 const AsmToken &NextToken) const { 2508 2509 // A list of consecutive registers: [s0,s1,s2,s3] 2510 if (Token.is(AsmToken::LBrac)) 2511 return true; 2512 2513 if (!Token.is(AsmToken::Identifier)) 2514 return false; 2515 2516 // A single register like s0 or a range of registers like s[0:1] 2517 2518 StringRef Str = Token.getString(); 2519 const RegInfo *Reg = getRegularRegInfo(Str); 2520 if (Reg) { 2521 StringRef RegName = Reg->Name; 2522 StringRef RegSuffix = Str.substr(RegName.size()); 2523 if (!RegSuffix.empty()) { 2524 unsigned Num; 2525 // A single register with an index: rXX 2526 if (getRegNum(RegSuffix, Num)) 2527 return true; 2528 } else { 2529 // A range of registers: r[XX:YY]. 2530 if (NextToken.is(AsmToken::LBrac)) 2531 return true; 2532 } 2533 } 2534 2535 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2536 } 2537 2538 bool 2539 AMDGPUAsmParser::isRegister() 2540 { 2541 return isRegister(getToken(), peekToken()); 2542 } 2543 2544 unsigned 2545 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2546 unsigned RegNum, 2547 unsigned RegWidth, 2548 SMLoc Loc) { 2549 2550 assert(isRegularReg(RegKind)); 2551 2552 unsigned AlignSize = 1; 2553 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2554 // SGPR and TTMP registers must be aligned. 2555 // Max required alignment is 4 dwords. 2556 AlignSize = std::min(RegWidth / 32, 4u); 2557 } 2558 2559 if (RegNum % AlignSize != 0) { 2560 Error(Loc, "invalid register alignment"); 2561 return AMDGPU::NoRegister; 2562 } 2563 2564 unsigned RegIdx = RegNum / AlignSize; 2565 int RCID = getRegClass(RegKind, RegWidth); 2566 if (RCID == -1) { 2567 Error(Loc, "invalid or unsupported register size"); 2568 return AMDGPU::NoRegister; 2569 } 2570 2571 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2572 const MCRegisterClass RC = TRI->getRegClass(RCID); 2573 if (RegIdx >= RC.getNumRegs()) { 2574 Error(Loc, "register index is out of range"); 2575 return AMDGPU::NoRegister; 2576 } 2577 2578 return RC.getRegister(RegIdx); 2579 } 2580 2581 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2582 int64_t RegLo, RegHi; 2583 if (!skipToken(AsmToken::LBrac, "missing register index")) 2584 return false; 2585 2586 SMLoc FirstIdxLoc = getLoc(); 2587 SMLoc SecondIdxLoc; 2588 2589 if (!parseExpr(RegLo)) 2590 return false; 2591 2592 if (trySkipToken(AsmToken::Colon)) { 2593 SecondIdxLoc = getLoc(); 2594 if (!parseExpr(RegHi)) 2595 return false; 2596 } else { 2597 RegHi = RegLo; 2598 } 2599 2600 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2601 return false; 2602 2603 if (!isUInt<32>(RegLo)) { 2604 Error(FirstIdxLoc, "invalid register index"); 2605 return false; 2606 } 2607 2608 if (!isUInt<32>(RegHi)) { 2609 Error(SecondIdxLoc, "invalid register index"); 2610 return false; 2611 } 2612 2613 if (RegLo > RegHi) { 2614 Error(FirstIdxLoc, "first register index should not exceed second index"); 2615 return false; 2616 } 2617 2618 Num = static_cast<unsigned>(RegLo); 2619 RegWidth = 32 * ((RegHi - RegLo) + 1); 2620 return true; 2621 } 2622 2623 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2624 unsigned &RegNum, unsigned &RegWidth, 2625 SmallVectorImpl<AsmToken> &Tokens) { 2626 assert(isToken(AsmToken::Identifier)); 2627 unsigned Reg = getSpecialRegForName(getTokenStr()); 2628 if (Reg) { 2629 RegNum = 0; 2630 RegWidth = 32; 2631 RegKind = IS_SPECIAL; 2632 Tokens.push_back(getToken()); 2633 lex(); // skip register name 2634 } 2635 return Reg; 2636 } 2637 2638 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2639 unsigned &RegNum, unsigned &RegWidth, 2640 SmallVectorImpl<AsmToken> &Tokens) { 2641 assert(isToken(AsmToken::Identifier)); 2642 StringRef RegName = getTokenStr(); 2643 auto Loc = getLoc(); 2644 2645 const RegInfo *RI = getRegularRegInfo(RegName); 2646 if (!RI) { 2647 Error(Loc, "invalid register name"); 2648 return AMDGPU::NoRegister; 2649 } 2650 2651 Tokens.push_back(getToken()); 2652 lex(); // skip register name 2653 2654 RegKind = RI->Kind; 2655 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2656 if (!RegSuffix.empty()) { 2657 // Single 32-bit register: vXX. 2658 if (!getRegNum(RegSuffix, RegNum)) { 2659 Error(Loc, "invalid register index"); 2660 return AMDGPU::NoRegister; 2661 } 2662 RegWidth = 32; 2663 } else { 2664 // Range of registers: v[XX:YY]. ":YY" is optional. 2665 if (!ParseRegRange(RegNum, RegWidth)) 2666 return AMDGPU::NoRegister; 2667 } 2668 2669 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2670 } 2671 2672 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2673 unsigned &RegWidth, 2674 SmallVectorImpl<AsmToken> &Tokens) { 2675 unsigned Reg = AMDGPU::NoRegister; 2676 auto ListLoc = getLoc(); 2677 2678 if (!skipToken(AsmToken::LBrac, 2679 "expected a register or a list of registers")) { 2680 return AMDGPU::NoRegister; 2681 } 2682 2683 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2684 2685 auto Loc = getLoc(); 2686 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2687 return AMDGPU::NoRegister; 2688 if (RegWidth != 32) { 2689 Error(Loc, "expected a single 32-bit register"); 2690 return AMDGPU::NoRegister; 2691 } 2692 2693 for (; trySkipToken(AsmToken::Comma); ) { 2694 RegisterKind NextRegKind; 2695 unsigned NextReg, NextRegNum, NextRegWidth; 2696 Loc = getLoc(); 2697 2698 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2699 NextRegNum, NextRegWidth, 2700 Tokens)) { 2701 return AMDGPU::NoRegister; 2702 } 2703 if (NextRegWidth != 32) { 2704 Error(Loc, "expected a single 32-bit register"); 2705 return AMDGPU::NoRegister; 2706 } 2707 if (NextRegKind != RegKind) { 2708 Error(Loc, "registers in a list must be of the same kind"); 2709 return AMDGPU::NoRegister; 2710 } 2711 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2712 return AMDGPU::NoRegister; 2713 } 2714 2715 if (!skipToken(AsmToken::RBrac, 2716 "expected a comma or a closing square bracket")) { 2717 return AMDGPU::NoRegister; 2718 } 2719 2720 if (isRegularReg(RegKind)) 2721 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2722 2723 return Reg; 2724 } 2725 2726 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2727 unsigned &RegNum, unsigned &RegWidth, 2728 SmallVectorImpl<AsmToken> &Tokens) { 2729 auto Loc = getLoc(); 2730 Reg = AMDGPU::NoRegister; 2731 2732 if (isToken(AsmToken::Identifier)) { 2733 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2734 if (Reg == AMDGPU::NoRegister) 2735 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2736 } else { 2737 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2738 } 2739 2740 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2741 if (Reg == AMDGPU::NoRegister) { 2742 assert(Parser.hasPendingError()); 2743 return false; 2744 } 2745 2746 if (!subtargetHasRegister(*TRI, Reg)) { 2747 if (Reg == AMDGPU::SGPR_NULL) { 2748 Error(Loc, "'null' operand is not supported on this GPU"); 2749 } else { 2750 Error(Loc, "register not available on this GPU"); 2751 } 2752 return false; 2753 } 2754 2755 return true; 2756 } 2757 2758 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2759 unsigned &RegNum, unsigned &RegWidth, 2760 bool RestoreOnFailure /*=false*/) { 2761 Reg = AMDGPU::NoRegister; 2762 2763 SmallVector<AsmToken, 1> Tokens; 2764 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2765 if (RestoreOnFailure) { 2766 while (!Tokens.empty()) { 2767 getLexer().UnLex(Tokens.pop_back_val()); 2768 } 2769 } 2770 return true; 2771 } 2772 return false; 2773 } 2774 2775 Optional<StringRef> 2776 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2777 switch (RegKind) { 2778 case IS_VGPR: 2779 return StringRef(".amdgcn.next_free_vgpr"); 2780 case IS_SGPR: 2781 return StringRef(".amdgcn.next_free_sgpr"); 2782 default: 2783 return None; 2784 } 2785 } 2786 2787 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2788 auto SymbolName = getGprCountSymbolName(RegKind); 2789 assert(SymbolName && "initializing invalid register kind"); 2790 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2791 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2792 } 2793 2794 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2795 unsigned DwordRegIndex, 2796 unsigned RegWidth) { 2797 // Symbols are only defined for GCN targets 2798 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2799 return true; 2800 2801 auto SymbolName = getGprCountSymbolName(RegKind); 2802 if (!SymbolName) 2803 return true; 2804 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2805 2806 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2807 int64_t OldCount; 2808 2809 if (!Sym->isVariable()) 2810 return !Error(getLoc(), 2811 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2812 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2813 return !Error( 2814 getLoc(), 2815 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2816 2817 if (OldCount <= NewMax) 2818 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2819 2820 return true; 2821 } 2822 2823 std::unique_ptr<AMDGPUOperand> 2824 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2825 const auto &Tok = getToken(); 2826 SMLoc StartLoc = Tok.getLoc(); 2827 SMLoc EndLoc = Tok.getEndLoc(); 2828 RegisterKind RegKind; 2829 unsigned Reg, RegNum, RegWidth; 2830 2831 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2832 return nullptr; 2833 } 2834 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2835 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2836 return nullptr; 2837 } else 2838 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2839 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2840 } 2841 2842 OperandMatchResultTy 2843 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2844 // TODO: add syntactic sugar for 1/(2*PI) 2845 2846 assert(!isRegister()); 2847 assert(!isModifier()); 2848 2849 const auto& Tok = getToken(); 2850 const auto& NextTok = peekToken(); 2851 bool IsReal = Tok.is(AsmToken::Real); 2852 SMLoc S = getLoc(); 2853 bool Negate = false; 2854 2855 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2856 lex(); 2857 IsReal = true; 2858 Negate = true; 2859 } 2860 2861 if (IsReal) { 2862 // Floating-point expressions are not supported. 2863 // Can only allow floating-point literals with an 2864 // optional sign. 2865 2866 StringRef Num = getTokenStr(); 2867 lex(); 2868 2869 APFloat RealVal(APFloat::IEEEdouble()); 2870 auto roundMode = APFloat::rmNearestTiesToEven; 2871 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2872 return MatchOperand_ParseFail; 2873 } 2874 if (Negate) 2875 RealVal.changeSign(); 2876 2877 Operands.push_back( 2878 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2879 AMDGPUOperand::ImmTyNone, true)); 2880 2881 return MatchOperand_Success; 2882 2883 } else { 2884 int64_t IntVal; 2885 const MCExpr *Expr; 2886 SMLoc S = getLoc(); 2887 2888 if (HasSP3AbsModifier) { 2889 // This is a workaround for handling expressions 2890 // as arguments of SP3 'abs' modifier, for example: 2891 // |1.0| 2892 // |-1| 2893 // |1+x| 2894 // This syntax is not compatible with syntax of standard 2895 // MC expressions (due to the trailing '|'). 2896 SMLoc EndLoc; 2897 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2898 return MatchOperand_ParseFail; 2899 } else { 2900 if (Parser.parseExpression(Expr)) 2901 return MatchOperand_ParseFail; 2902 } 2903 2904 if (Expr->evaluateAsAbsolute(IntVal)) { 2905 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2906 } else { 2907 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2908 } 2909 2910 return MatchOperand_Success; 2911 } 2912 2913 return MatchOperand_NoMatch; 2914 } 2915 2916 OperandMatchResultTy 2917 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2918 if (!isRegister()) 2919 return MatchOperand_NoMatch; 2920 2921 if (auto R = parseRegister()) { 2922 assert(R->isReg()); 2923 Operands.push_back(std::move(R)); 2924 return MatchOperand_Success; 2925 } 2926 return MatchOperand_ParseFail; 2927 } 2928 2929 OperandMatchResultTy 2930 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2931 auto res = parseReg(Operands); 2932 if (res != MatchOperand_NoMatch) { 2933 return res; 2934 } else if (isModifier()) { 2935 return MatchOperand_NoMatch; 2936 } else { 2937 return parseImm(Operands, HasSP3AbsMod); 2938 } 2939 } 2940 2941 bool 2942 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2943 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2944 const auto &str = Token.getString(); 2945 return str == "abs" || str == "neg" || str == "sext"; 2946 } 2947 return false; 2948 } 2949 2950 bool 2951 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2952 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2953 } 2954 2955 bool 2956 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2957 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2958 } 2959 2960 bool 2961 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2962 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2963 } 2964 2965 // Check if this is an operand modifier or an opcode modifier 2966 // which may look like an expression but it is not. We should 2967 // avoid parsing these modifiers as expressions. Currently 2968 // recognized sequences are: 2969 // |...| 2970 // abs(...) 2971 // neg(...) 2972 // sext(...) 2973 // -reg 2974 // -|...| 2975 // -abs(...) 2976 // name:... 2977 // Note that simple opcode modifiers like 'gds' may be parsed as 2978 // expressions; this is a special case. See getExpressionAsToken. 2979 // 2980 bool 2981 AMDGPUAsmParser::isModifier() { 2982 2983 AsmToken Tok = getToken(); 2984 AsmToken NextToken[2]; 2985 peekTokens(NextToken); 2986 2987 return isOperandModifier(Tok, NextToken[0]) || 2988 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2989 isOpcodeModifierWithVal(Tok, NextToken[0]); 2990 } 2991 2992 // Check if the current token is an SP3 'neg' modifier. 2993 // Currently this modifier is allowed in the following context: 2994 // 2995 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2996 // 2. Before an 'abs' modifier: -abs(...) 2997 // 3. Before an SP3 'abs' modifier: -|...| 2998 // 2999 // In all other cases "-" is handled as a part 3000 // of an expression that follows the sign. 3001 // 3002 // Note: When "-" is followed by an integer literal, 3003 // this is interpreted as integer negation rather 3004 // than a floating-point NEG modifier applied to N. 3005 // Beside being contr-intuitive, such use of floating-point 3006 // NEG modifier would have resulted in different meaning 3007 // of integer literals used with VOP1/2/C and VOP3, 3008 // for example: 3009 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3010 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3011 // Negative fp literals with preceding "-" are 3012 // handled likewise for uniformity 3013 // 3014 bool 3015 AMDGPUAsmParser::parseSP3NegModifier() { 3016 3017 AsmToken NextToken[2]; 3018 peekTokens(NextToken); 3019 3020 if (isToken(AsmToken::Minus) && 3021 (isRegister(NextToken[0], NextToken[1]) || 3022 NextToken[0].is(AsmToken::Pipe) || 3023 isId(NextToken[0], "abs"))) { 3024 lex(); 3025 return true; 3026 } 3027 3028 return false; 3029 } 3030 3031 OperandMatchResultTy 3032 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3033 bool AllowImm) { 3034 bool Neg, SP3Neg; 3035 bool Abs, SP3Abs; 3036 SMLoc Loc; 3037 3038 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3039 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3040 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3041 return MatchOperand_ParseFail; 3042 } 3043 3044 SP3Neg = parseSP3NegModifier(); 3045 3046 Loc = getLoc(); 3047 Neg = trySkipId("neg"); 3048 if (Neg && SP3Neg) { 3049 Error(Loc, "expected register or immediate"); 3050 return MatchOperand_ParseFail; 3051 } 3052 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3053 return MatchOperand_ParseFail; 3054 3055 Abs = trySkipId("abs"); 3056 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3057 return MatchOperand_ParseFail; 3058 3059 Loc = getLoc(); 3060 SP3Abs = trySkipToken(AsmToken::Pipe); 3061 if (Abs && SP3Abs) { 3062 Error(Loc, "expected register or immediate"); 3063 return MatchOperand_ParseFail; 3064 } 3065 3066 OperandMatchResultTy Res; 3067 if (AllowImm) { 3068 Res = parseRegOrImm(Operands, SP3Abs); 3069 } else { 3070 Res = parseReg(Operands); 3071 } 3072 if (Res != MatchOperand_Success) { 3073 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3074 } 3075 3076 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3077 return MatchOperand_ParseFail; 3078 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3079 return MatchOperand_ParseFail; 3080 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3081 return MatchOperand_ParseFail; 3082 3083 AMDGPUOperand::Modifiers Mods; 3084 Mods.Abs = Abs || SP3Abs; 3085 Mods.Neg = Neg || SP3Neg; 3086 3087 if (Mods.hasFPModifiers()) { 3088 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3089 if (Op.isExpr()) { 3090 Error(Op.getStartLoc(), "expected an absolute expression"); 3091 return MatchOperand_ParseFail; 3092 } 3093 Op.setModifiers(Mods); 3094 } 3095 return MatchOperand_Success; 3096 } 3097 3098 OperandMatchResultTy 3099 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3100 bool AllowImm) { 3101 bool Sext = trySkipId("sext"); 3102 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3103 return MatchOperand_ParseFail; 3104 3105 OperandMatchResultTy Res; 3106 if (AllowImm) { 3107 Res = parseRegOrImm(Operands); 3108 } else { 3109 Res = parseReg(Operands); 3110 } 3111 if (Res != MatchOperand_Success) { 3112 return Sext? MatchOperand_ParseFail : Res; 3113 } 3114 3115 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3116 return MatchOperand_ParseFail; 3117 3118 AMDGPUOperand::Modifiers Mods; 3119 Mods.Sext = Sext; 3120 3121 if (Mods.hasIntModifiers()) { 3122 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3123 if (Op.isExpr()) { 3124 Error(Op.getStartLoc(), "expected an absolute expression"); 3125 return MatchOperand_ParseFail; 3126 } 3127 Op.setModifiers(Mods); 3128 } 3129 3130 return MatchOperand_Success; 3131 } 3132 3133 OperandMatchResultTy 3134 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3135 return parseRegOrImmWithFPInputMods(Operands, false); 3136 } 3137 3138 OperandMatchResultTy 3139 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3140 return parseRegOrImmWithIntInputMods(Operands, false); 3141 } 3142 3143 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3144 auto Loc = getLoc(); 3145 if (trySkipId("off")) { 3146 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3147 AMDGPUOperand::ImmTyOff, false)); 3148 return MatchOperand_Success; 3149 } 3150 3151 if (!isRegister()) 3152 return MatchOperand_NoMatch; 3153 3154 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3155 if (Reg) { 3156 Operands.push_back(std::move(Reg)); 3157 return MatchOperand_Success; 3158 } 3159 3160 return MatchOperand_ParseFail; 3161 3162 } 3163 3164 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3165 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3166 3167 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3168 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3169 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3170 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3171 return Match_InvalidOperand; 3172 3173 if ((TSFlags & SIInstrFlags::VOP3) && 3174 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3175 getForcedEncodingSize() != 64) 3176 return Match_PreferE32; 3177 3178 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3179 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3180 // v_mac_f32/16 allow only dst_sel == DWORD; 3181 auto OpNum = 3182 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3183 const auto &Op = Inst.getOperand(OpNum); 3184 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3185 return Match_InvalidOperand; 3186 } 3187 } 3188 3189 return Match_Success; 3190 } 3191 3192 static ArrayRef<unsigned> getAllVariants() { 3193 static const unsigned Variants[] = { 3194 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3195 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3196 }; 3197 3198 return makeArrayRef(Variants); 3199 } 3200 3201 // What asm variants we should check 3202 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3203 if (getForcedEncodingSize() == 32) { 3204 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3205 return makeArrayRef(Variants); 3206 } 3207 3208 if (isForcedVOP3()) { 3209 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3210 return makeArrayRef(Variants); 3211 } 3212 3213 if (isForcedSDWA()) { 3214 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3215 AMDGPUAsmVariants::SDWA9}; 3216 return makeArrayRef(Variants); 3217 } 3218 3219 if (isForcedDPP()) { 3220 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3221 return makeArrayRef(Variants); 3222 } 3223 3224 return getAllVariants(); 3225 } 3226 3227 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3228 if (getForcedEncodingSize() == 32) 3229 return "e32"; 3230 3231 if (isForcedVOP3()) 3232 return "e64"; 3233 3234 if (isForcedSDWA()) 3235 return "sdwa"; 3236 3237 if (isForcedDPP()) 3238 return "dpp"; 3239 3240 return ""; 3241 } 3242 3243 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3244 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3245 const unsigned Num = Desc.getNumImplicitUses(); 3246 for (unsigned i = 0; i < Num; ++i) { 3247 unsigned Reg = Desc.ImplicitUses[i]; 3248 switch (Reg) { 3249 case AMDGPU::FLAT_SCR: 3250 case AMDGPU::VCC: 3251 case AMDGPU::VCC_LO: 3252 case AMDGPU::VCC_HI: 3253 case AMDGPU::M0: 3254 return Reg; 3255 default: 3256 break; 3257 } 3258 } 3259 return AMDGPU::NoRegister; 3260 } 3261 3262 // NB: This code is correct only when used to check constant 3263 // bus limitations because GFX7 support no f16 inline constants. 3264 // Note that there are no cases when a GFX7 opcode violates 3265 // constant bus limitations due to the use of an f16 constant. 3266 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3267 unsigned OpIdx) const { 3268 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3269 3270 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3271 return false; 3272 } 3273 3274 const MCOperand &MO = Inst.getOperand(OpIdx); 3275 3276 int64_t Val = MO.getImm(); 3277 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3278 3279 switch (OpSize) { // expected operand size 3280 case 8: 3281 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3282 case 4: 3283 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3284 case 2: { 3285 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3286 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3287 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3288 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3289 return AMDGPU::isInlinableIntLiteral(Val); 3290 3291 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3292 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3293 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3294 return AMDGPU::isInlinableIntLiteralV216(Val); 3295 3296 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3297 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3298 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3299 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3300 3301 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3302 } 3303 default: 3304 llvm_unreachable("invalid operand size"); 3305 } 3306 } 3307 3308 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3309 if (!isGFX10Plus()) 3310 return 1; 3311 3312 switch (Opcode) { 3313 // 64-bit shift instructions can use only one scalar value input 3314 case AMDGPU::V_LSHLREV_B64_e64: 3315 case AMDGPU::V_LSHLREV_B64_gfx10: 3316 case AMDGPU::V_LSHRREV_B64_e64: 3317 case AMDGPU::V_LSHRREV_B64_gfx10: 3318 case AMDGPU::V_ASHRREV_I64_e64: 3319 case AMDGPU::V_ASHRREV_I64_gfx10: 3320 case AMDGPU::V_LSHL_B64_e64: 3321 case AMDGPU::V_LSHR_B64_e64: 3322 case AMDGPU::V_ASHR_I64_e64: 3323 return 1; 3324 default: 3325 return 2; 3326 } 3327 } 3328 3329 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3330 const MCOperand &MO = Inst.getOperand(OpIdx); 3331 if (MO.isImm()) { 3332 return !isInlineConstant(Inst, OpIdx); 3333 } else if (MO.isReg()) { 3334 auto Reg = MO.getReg(); 3335 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3336 auto PReg = mc2PseudoReg(Reg); 3337 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3338 } else { 3339 return true; 3340 } 3341 } 3342 3343 bool 3344 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3345 const OperandVector &Operands) { 3346 const unsigned Opcode = Inst.getOpcode(); 3347 const MCInstrDesc &Desc = MII.get(Opcode); 3348 unsigned LastSGPR = AMDGPU::NoRegister; 3349 unsigned ConstantBusUseCount = 0; 3350 unsigned NumLiterals = 0; 3351 unsigned LiteralSize; 3352 3353 if (Desc.TSFlags & 3354 (SIInstrFlags::VOPC | 3355 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3356 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3357 SIInstrFlags::SDWA)) { 3358 // Check special imm operands (used by madmk, etc) 3359 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3360 ++NumLiterals; 3361 LiteralSize = 4; 3362 } 3363 3364 SmallDenseSet<unsigned> SGPRsUsed; 3365 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3366 if (SGPRUsed != AMDGPU::NoRegister) { 3367 SGPRsUsed.insert(SGPRUsed); 3368 ++ConstantBusUseCount; 3369 } 3370 3371 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3372 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3373 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3374 3375 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3376 3377 for (int OpIdx : OpIndices) { 3378 if (OpIdx == -1) break; 3379 3380 const MCOperand &MO = Inst.getOperand(OpIdx); 3381 if (usesConstantBus(Inst, OpIdx)) { 3382 if (MO.isReg()) { 3383 LastSGPR = mc2PseudoReg(MO.getReg()); 3384 // Pairs of registers with a partial intersections like these 3385 // s0, s[0:1] 3386 // flat_scratch_lo, flat_scratch 3387 // flat_scratch_lo, flat_scratch_hi 3388 // are theoretically valid but they are disabled anyway. 3389 // Note that this code mimics SIInstrInfo::verifyInstruction 3390 if (!SGPRsUsed.count(LastSGPR)) { 3391 SGPRsUsed.insert(LastSGPR); 3392 ++ConstantBusUseCount; 3393 } 3394 } else { // Expression or a literal 3395 3396 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3397 continue; // special operand like VINTERP attr_chan 3398 3399 // An instruction may use only one literal. 3400 // This has been validated on the previous step. 3401 // See validateVOPLiteral. 3402 // This literal may be used as more than one operand. 3403 // If all these operands are of the same size, 3404 // this literal counts as one scalar value. 3405 // Otherwise it counts as 2 scalar values. 3406 // See "GFX10 Shader Programming", section 3.6.2.3. 3407 3408 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3409 if (Size < 4) Size = 4; 3410 3411 if (NumLiterals == 0) { 3412 NumLiterals = 1; 3413 LiteralSize = Size; 3414 } else if (LiteralSize != Size) { 3415 NumLiterals = 2; 3416 } 3417 } 3418 } 3419 } 3420 } 3421 ConstantBusUseCount += NumLiterals; 3422 3423 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3424 return true; 3425 3426 SMLoc LitLoc = getLitLoc(Operands); 3427 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3428 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3429 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3430 return false; 3431 } 3432 3433 bool 3434 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3435 const OperandVector &Operands) { 3436 const unsigned Opcode = Inst.getOpcode(); 3437 const MCInstrDesc &Desc = MII.get(Opcode); 3438 3439 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3440 if (DstIdx == -1 || 3441 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3442 return true; 3443 } 3444 3445 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3446 3447 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3448 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3449 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3450 3451 assert(DstIdx != -1); 3452 const MCOperand &Dst = Inst.getOperand(DstIdx); 3453 assert(Dst.isReg()); 3454 3455 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3456 3457 for (int SrcIdx : SrcIndices) { 3458 if (SrcIdx == -1) break; 3459 const MCOperand &Src = Inst.getOperand(SrcIdx); 3460 if (Src.isReg()) { 3461 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3462 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3463 Error(getRegLoc(SrcReg, Operands), 3464 "destination must be different than all sources"); 3465 return false; 3466 } 3467 } 3468 } 3469 3470 return true; 3471 } 3472 3473 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3474 3475 const unsigned Opc = Inst.getOpcode(); 3476 const MCInstrDesc &Desc = MII.get(Opc); 3477 3478 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3479 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3480 assert(ClampIdx != -1); 3481 return Inst.getOperand(ClampIdx).getImm() == 0; 3482 } 3483 3484 return true; 3485 } 3486 3487 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3488 3489 const unsigned Opc = Inst.getOpcode(); 3490 const MCInstrDesc &Desc = MII.get(Opc); 3491 3492 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3493 return true; 3494 3495 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3496 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3497 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3498 3499 assert(VDataIdx != -1); 3500 3501 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3502 return true; 3503 3504 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3505 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3506 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3507 if (DMask == 0) 3508 DMask = 1; 3509 3510 unsigned DataSize = 3511 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3512 if (hasPackedD16()) { 3513 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3514 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3515 DataSize = (DataSize + 1) / 2; 3516 } 3517 3518 return (VDataSize / 4) == DataSize + TFESize; 3519 } 3520 3521 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3522 const unsigned Opc = Inst.getOpcode(); 3523 const MCInstrDesc &Desc = MII.get(Opc); 3524 3525 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3526 return true; 3527 3528 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3529 3530 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3531 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3532 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3533 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3534 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3535 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3536 3537 assert(VAddr0Idx != -1); 3538 assert(SrsrcIdx != -1); 3539 assert(SrsrcIdx > VAddr0Idx); 3540 3541 if (DimIdx == -1) 3542 return true; // intersect_ray 3543 3544 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3545 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3546 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3547 unsigned ActualAddrSize = 3548 IsNSA ? SrsrcIdx - VAddr0Idx 3549 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3550 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3551 3552 unsigned ExpectedAddrSize = 3553 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3554 3555 if (!IsNSA) { 3556 if (ExpectedAddrSize > 8) 3557 ExpectedAddrSize = 16; 3558 3559 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3560 // This provides backward compatibility for assembly created 3561 // before 160b/192b/224b types were directly supported. 3562 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3563 return true; 3564 } 3565 3566 return ActualAddrSize == ExpectedAddrSize; 3567 } 3568 3569 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3570 3571 const unsigned Opc = Inst.getOpcode(); 3572 const MCInstrDesc &Desc = MII.get(Opc); 3573 3574 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3575 return true; 3576 if (!Desc.mayLoad() || !Desc.mayStore()) 3577 return true; // Not atomic 3578 3579 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3580 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3581 3582 // This is an incomplete check because image_atomic_cmpswap 3583 // may only use 0x3 and 0xf while other atomic operations 3584 // may use 0x1 and 0x3. However these limitations are 3585 // verified when we check that dmask matches dst size. 3586 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3587 } 3588 3589 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3590 3591 const unsigned Opc = Inst.getOpcode(); 3592 const MCInstrDesc &Desc = MII.get(Opc); 3593 3594 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3595 return true; 3596 3597 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3598 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3599 3600 // GATHER4 instructions use dmask in a different fashion compared to 3601 // other MIMG instructions. The only useful DMASK values are 3602 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3603 // (red,red,red,red) etc.) The ISA document doesn't mention 3604 // this. 3605 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3606 } 3607 3608 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3609 const unsigned Opc = Inst.getOpcode(); 3610 const MCInstrDesc &Desc = MII.get(Opc); 3611 3612 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3613 return true; 3614 3615 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3616 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3617 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3618 3619 if (!BaseOpcode->MSAA) 3620 return true; 3621 3622 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3623 assert(DimIdx != -1); 3624 3625 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3626 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3627 3628 return DimInfo->MSAA; 3629 } 3630 3631 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3632 { 3633 switch (Opcode) { 3634 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3635 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3636 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3637 return true; 3638 default: 3639 return false; 3640 } 3641 } 3642 3643 // movrels* opcodes should only allow VGPRS as src0. 3644 // This is specified in .td description for vop1/vop3, 3645 // but sdwa is handled differently. See isSDWAOperand. 3646 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3647 const OperandVector &Operands) { 3648 3649 const unsigned Opc = Inst.getOpcode(); 3650 const MCInstrDesc &Desc = MII.get(Opc); 3651 3652 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3653 return true; 3654 3655 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3656 assert(Src0Idx != -1); 3657 3658 SMLoc ErrLoc; 3659 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3660 if (Src0.isReg()) { 3661 auto Reg = mc2PseudoReg(Src0.getReg()); 3662 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3663 if (!isSGPR(Reg, TRI)) 3664 return true; 3665 ErrLoc = getRegLoc(Reg, Operands); 3666 } else { 3667 ErrLoc = getConstLoc(Operands); 3668 } 3669 3670 Error(ErrLoc, "source operand must be a VGPR"); 3671 return false; 3672 } 3673 3674 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3675 const OperandVector &Operands) { 3676 3677 const unsigned Opc = Inst.getOpcode(); 3678 3679 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3680 return true; 3681 3682 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3683 assert(Src0Idx != -1); 3684 3685 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3686 if (!Src0.isReg()) 3687 return true; 3688 3689 auto Reg = mc2PseudoReg(Src0.getReg()); 3690 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3691 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3692 Error(getRegLoc(Reg, Operands), 3693 "source operand must be either a VGPR or an inline constant"); 3694 return false; 3695 } 3696 3697 return true; 3698 } 3699 3700 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3701 const OperandVector &Operands) { 3702 const unsigned Opc = Inst.getOpcode(); 3703 const MCInstrDesc &Desc = MII.get(Opc); 3704 3705 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3706 return true; 3707 3708 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3709 if (Src2Idx == -1) 3710 return true; 3711 3712 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3713 if (!Src2.isReg()) 3714 return true; 3715 3716 MCRegister Src2Reg = Src2.getReg(); 3717 MCRegister DstReg = Inst.getOperand(0).getReg(); 3718 if (Src2Reg == DstReg) 3719 return true; 3720 3721 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3722 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3723 return true; 3724 3725 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3726 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3727 "source 2 operand must not partially overlap with dst"); 3728 return false; 3729 } 3730 3731 return true; 3732 } 3733 3734 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3735 switch (Inst.getOpcode()) { 3736 default: 3737 return true; 3738 case V_DIV_SCALE_F32_gfx6_gfx7: 3739 case V_DIV_SCALE_F32_vi: 3740 case V_DIV_SCALE_F32_gfx10: 3741 case V_DIV_SCALE_F64_gfx6_gfx7: 3742 case V_DIV_SCALE_F64_vi: 3743 case V_DIV_SCALE_F64_gfx10: 3744 break; 3745 } 3746 3747 // TODO: Check that src0 = src1 or src2. 3748 3749 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3750 AMDGPU::OpName::src2_modifiers, 3751 AMDGPU::OpName::src2_modifiers}) { 3752 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3753 .getImm() & 3754 SISrcMods::ABS) { 3755 return false; 3756 } 3757 } 3758 3759 return true; 3760 } 3761 3762 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3763 3764 const unsigned Opc = Inst.getOpcode(); 3765 const MCInstrDesc &Desc = MII.get(Opc); 3766 3767 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3768 return true; 3769 3770 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3771 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3772 if (isCI() || isSI()) 3773 return false; 3774 } 3775 3776 return true; 3777 } 3778 3779 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3780 const unsigned Opc = Inst.getOpcode(); 3781 const MCInstrDesc &Desc = MII.get(Opc); 3782 3783 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3784 return true; 3785 3786 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3787 if (DimIdx < 0) 3788 return true; 3789 3790 long Imm = Inst.getOperand(DimIdx).getImm(); 3791 if (Imm < 0 || Imm >= 8) 3792 return false; 3793 3794 return true; 3795 } 3796 3797 static bool IsRevOpcode(const unsigned Opcode) 3798 { 3799 switch (Opcode) { 3800 case AMDGPU::V_SUBREV_F32_e32: 3801 case AMDGPU::V_SUBREV_F32_e64: 3802 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3803 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3804 case AMDGPU::V_SUBREV_F32_e32_vi: 3805 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3806 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3807 case AMDGPU::V_SUBREV_F32_e64_vi: 3808 3809 case AMDGPU::V_SUBREV_CO_U32_e32: 3810 case AMDGPU::V_SUBREV_CO_U32_e64: 3811 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3812 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3813 3814 case AMDGPU::V_SUBBREV_U32_e32: 3815 case AMDGPU::V_SUBBREV_U32_e64: 3816 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3817 case AMDGPU::V_SUBBREV_U32_e32_vi: 3818 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3819 case AMDGPU::V_SUBBREV_U32_e64_vi: 3820 3821 case AMDGPU::V_SUBREV_U32_e32: 3822 case AMDGPU::V_SUBREV_U32_e64: 3823 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3824 case AMDGPU::V_SUBREV_U32_e32_vi: 3825 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3826 case AMDGPU::V_SUBREV_U32_e64_vi: 3827 3828 case AMDGPU::V_SUBREV_F16_e32: 3829 case AMDGPU::V_SUBREV_F16_e64: 3830 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3831 case AMDGPU::V_SUBREV_F16_e32_vi: 3832 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3833 case AMDGPU::V_SUBREV_F16_e64_vi: 3834 3835 case AMDGPU::V_SUBREV_U16_e32: 3836 case AMDGPU::V_SUBREV_U16_e64: 3837 case AMDGPU::V_SUBREV_U16_e32_vi: 3838 case AMDGPU::V_SUBREV_U16_e64_vi: 3839 3840 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3841 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3842 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3843 3844 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3845 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3846 3847 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3848 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3849 3850 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3851 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3852 3853 case AMDGPU::V_LSHRREV_B32_e32: 3854 case AMDGPU::V_LSHRREV_B32_e64: 3855 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3856 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3857 case AMDGPU::V_LSHRREV_B32_e32_vi: 3858 case AMDGPU::V_LSHRREV_B32_e64_vi: 3859 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3860 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3861 3862 case AMDGPU::V_ASHRREV_I32_e32: 3863 case AMDGPU::V_ASHRREV_I32_e64: 3864 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3865 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3866 case AMDGPU::V_ASHRREV_I32_e32_vi: 3867 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3868 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3869 case AMDGPU::V_ASHRREV_I32_e64_vi: 3870 3871 case AMDGPU::V_LSHLREV_B32_e32: 3872 case AMDGPU::V_LSHLREV_B32_e64: 3873 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3874 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3875 case AMDGPU::V_LSHLREV_B32_e32_vi: 3876 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3877 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3878 case AMDGPU::V_LSHLREV_B32_e64_vi: 3879 3880 case AMDGPU::V_LSHLREV_B16_e32: 3881 case AMDGPU::V_LSHLREV_B16_e64: 3882 case AMDGPU::V_LSHLREV_B16_e32_vi: 3883 case AMDGPU::V_LSHLREV_B16_e64_vi: 3884 case AMDGPU::V_LSHLREV_B16_gfx10: 3885 3886 case AMDGPU::V_LSHRREV_B16_e32: 3887 case AMDGPU::V_LSHRREV_B16_e64: 3888 case AMDGPU::V_LSHRREV_B16_e32_vi: 3889 case AMDGPU::V_LSHRREV_B16_e64_vi: 3890 case AMDGPU::V_LSHRREV_B16_gfx10: 3891 3892 case AMDGPU::V_ASHRREV_I16_e32: 3893 case AMDGPU::V_ASHRREV_I16_e64: 3894 case AMDGPU::V_ASHRREV_I16_e32_vi: 3895 case AMDGPU::V_ASHRREV_I16_e64_vi: 3896 case AMDGPU::V_ASHRREV_I16_gfx10: 3897 3898 case AMDGPU::V_LSHLREV_B64_e64: 3899 case AMDGPU::V_LSHLREV_B64_gfx10: 3900 case AMDGPU::V_LSHLREV_B64_vi: 3901 3902 case AMDGPU::V_LSHRREV_B64_e64: 3903 case AMDGPU::V_LSHRREV_B64_gfx10: 3904 case AMDGPU::V_LSHRREV_B64_vi: 3905 3906 case AMDGPU::V_ASHRREV_I64_e64: 3907 case AMDGPU::V_ASHRREV_I64_gfx10: 3908 case AMDGPU::V_ASHRREV_I64_vi: 3909 3910 case AMDGPU::V_PK_LSHLREV_B16: 3911 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3912 case AMDGPU::V_PK_LSHLREV_B16_vi: 3913 3914 case AMDGPU::V_PK_LSHRREV_B16: 3915 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3916 case AMDGPU::V_PK_LSHRREV_B16_vi: 3917 case AMDGPU::V_PK_ASHRREV_I16: 3918 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3919 case AMDGPU::V_PK_ASHRREV_I16_vi: 3920 return true; 3921 default: 3922 return false; 3923 } 3924 } 3925 3926 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3927 3928 using namespace SIInstrFlags; 3929 const unsigned Opcode = Inst.getOpcode(); 3930 const MCInstrDesc &Desc = MII.get(Opcode); 3931 3932 // lds_direct register is defined so that it can be used 3933 // with 9-bit operands only. Ignore encodings which do not accept these. 3934 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3935 if ((Desc.TSFlags & Enc) == 0) 3936 return None; 3937 3938 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3939 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3940 if (SrcIdx == -1) 3941 break; 3942 const auto &Src = Inst.getOperand(SrcIdx); 3943 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3944 3945 if (isGFX90A()) 3946 return StringRef("lds_direct is not supported on this GPU"); 3947 3948 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3949 return StringRef("lds_direct cannot be used with this instruction"); 3950 3951 if (SrcName != OpName::src0) 3952 return StringRef("lds_direct may be used as src0 only"); 3953 } 3954 } 3955 3956 return None; 3957 } 3958 3959 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3960 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3961 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3962 if (Op.isFlatOffset()) 3963 return Op.getStartLoc(); 3964 } 3965 return getLoc(); 3966 } 3967 3968 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3969 const OperandVector &Operands) { 3970 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3971 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3972 return true; 3973 3974 auto Opcode = Inst.getOpcode(); 3975 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3976 assert(OpNum != -1); 3977 3978 const auto &Op = Inst.getOperand(OpNum); 3979 if (!hasFlatOffsets() && Op.getImm() != 0) { 3980 Error(getFlatOffsetLoc(Operands), 3981 "flat offset modifier is not supported on this GPU"); 3982 return false; 3983 } 3984 3985 // For FLAT segment the offset must be positive; 3986 // MSB is ignored and forced to zero. 3987 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3988 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3989 if (!isIntN(OffsetSize, Op.getImm())) { 3990 Error(getFlatOffsetLoc(Operands), 3991 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3992 return false; 3993 } 3994 } else { 3995 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3996 if (!isUIntN(OffsetSize, Op.getImm())) { 3997 Error(getFlatOffsetLoc(Operands), 3998 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3999 return false; 4000 } 4001 } 4002 4003 return true; 4004 } 4005 4006 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4007 // Start with second operand because SMEM Offset cannot be dst or src0. 4008 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4009 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4010 if (Op.isSMEMOffset()) 4011 return Op.getStartLoc(); 4012 } 4013 return getLoc(); 4014 } 4015 4016 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4017 const OperandVector &Operands) { 4018 if (isCI() || isSI()) 4019 return true; 4020 4021 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4022 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4023 return true; 4024 4025 auto Opcode = Inst.getOpcode(); 4026 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4027 if (OpNum == -1) 4028 return true; 4029 4030 const auto &Op = Inst.getOperand(OpNum); 4031 if (!Op.isImm()) 4032 return true; 4033 4034 uint64_t Offset = Op.getImm(); 4035 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4036 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4037 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4038 return true; 4039 4040 Error(getSMEMOffsetLoc(Operands), 4041 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4042 "expected a 21-bit signed offset"); 4043 4044 return false; 4045 } 4046 4047 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4048 unsigned Opcode = Inst.getOpcode(); 4049 const MCInstrDesc &Desc = MII.get(Opcode); 4050 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4051 return true; 4052 4053 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4054 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4055 4056 const int OpIndices[] = { Src0Idx, Src1Idx }; 4057 4058 unsigned NumExprs = 0; 4059 unsigned NumLiterals = 0; 4060 uint32_t LiteralValue; 4061 4062 for (int OpIdx : OpIndices) { 4063 if (OpIdx == -1) break; 4064 4065 const MCOperand &MO = Inst.getOperand(OpIdx); 4066 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4067 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4068 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4069 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4070 if (NumLiterals == 0 || LiteralValue != Value) { 4071 LiteralValue = Value; 4072 ++NumLiterals; 4073 } 4074 } else if (MO.isExpr()) { 4075 ++NumExprs; 4076 } 4077 } 4078 } 4079 4080 return NumLiterals + NumExprs <= 1; 4081 } 4082 4083 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4084 const unsigned Opc = Inst.getOpcode(); 4085 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4086 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4087 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4088 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4089 4090 if (OpSel & ~3) 4091 return false; 4092 } 4093 4094 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4095 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4096 if (OpSelIdx != -1) { 4097 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4098 return false; 4099 } 4100 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4101 if (OpSelHiIdx != -1) { 4102 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4103 return false; 4104 } 4105 } 4106 4107 return true; 4108 } 4109 4110 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4111 const OperandVector &Operands) { 4112 const unsigned Opc = Inst.getOpcode(); 4113 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4114 if (DppCtrlIdx < 0) 4115 return true; 4116 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4117 4118 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4119 // DPP64 is supported for row_newbcast only. 4120 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4121 if (Src0Idx >= 0 && 4122 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4123 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4124 Error(S, "64 bit dpp only supports row_newbcast"); 4125 return false; 4126 } 4127 } 4128 4129 return true; 4130 } 4131 4132 // Check if VCC register matches wavefront size 4133 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4134 auto FB = getFeatureBits(); 4135 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4136 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4137 } 4138 4139 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4140 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4141 const OperandVector &Operands) { 4142 unsigned Opcode = Inst.getOpcode(); 4143 const MCInstrDesc &Desc = MII.get(Opcode); 4144 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4145 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4146 ImmIdx == -1) 4147 return true; 4148 4149 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4150 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4151 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4152 4153 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4154 4155 unsigned NumExprs = 0; 4156 unsigned NumLiterals = 0; 4157 uint32_t LiteralValue; 4158 4159 for (int OpIdx : OpIndices) { 4160 if (OpIdx == -1) 4161 continue; 4162 4163 const MCOperand &MO = Inst.getOperand(OpIdx); 4164 if (!MO.isImm() && !MO.isExpr()) 4165 continue; 4166 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4167 continue; 4168 4169 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4170 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4171 Error(getConstLoc(Operands), 4172 "inline constants are not allowed for this operand"); 4173 return false; 4174 } 4175 4176 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4177 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4178 if (NumLiterals == 0 || LiteralValue != Value) { 4179 LiteralValue = Value; 4180 ++NumLiterals; 4181 } 4182 } else if (MO.isExpr()) { 4183 ++NumExprs; 4184 } 4185 } 4186 NumLiterals += NumExprs; 4187 4188 if (!NumLiterals) 4189 return true; 4190 4191 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4192 Error(getLitLoc(Operands), "literal operands are not supported"); 4193 return false; 4194 } 4195 4196 if (NumLiterals > 1) { 4197 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4198 return false; 4199 } 4200 4201 return true; 4202 } 4203 4204 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4205 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4206 const MCRegisterInfo *MRI) { 4207 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4208 if (OpIdx < 0) 4209 return -1; 4210 4211 const MCOperand &Op = Inst.getOperand(OpIdx); 4212 if (!Op.isReg()) 4213 return -1; 4214 4215 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4216 auto Reg = Sub ? Sub : Op.getReg(); 4217 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4218 return AGPR32.contains(Reg) ? 1 : 0; 4219 } 4220 4221 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4222 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4223 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4224 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4225 SIInstrFlags::DS)) == 0) 4226 return true; 4227 4228 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4229 : AMDGPU::OpName::vdata; 4230 4231 const MCRegisterInfo *MRI = getMRI(); 4232 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4233 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4234 4235 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4236 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4237 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4238 return false; 4239 } 4240 4241 auto FB = getFeatureBits(); 4242 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4243 if (DataAreg < 0 || DstAreg < 0) 4244 return true; 4245 return DstAreg == DataAreg; 4246 } 4247 4248 return DstAreg < 1 && DataAreg < 1; 4249 } 4250 4251 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4252 auto FB = getFeatureBits(); 4253 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4254 return true; 4255 4256 const MCRegisterInfo *MRI = getMRI(); 4257 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4258 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4259 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4260 const MCOperand &Op = Inst.getOperand(I); 4261 if (!Op.isReg()) 4262 continue; 4263 4264 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4265 if (!Sub) 4266 continue; 4267 4268 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4269 return false; 4270 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4271 return false; 4272 } 4273 4274 return true; 4275 } 4276 4277 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4278 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4279 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4280 if (Op.isBLGP()) 4281 return Op.getStartLoc(); 4282 } 4283 return SMLoc(); 4284 } 4285 4286 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4287 const OperandVector &Operands) { 4288 unsigned Opc = Inst.getOpcode(); 4289 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4290 if (BlgpIdx == -1) 4291 return true; 4292 SMLoc BLGPLoc = getBLGPLoc(Operands); 4293 if (!BLGPLoc.isValid()) 4294 return true; 4295 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4296 auto FB = getFeatureBits(); 4297 bool UsesNeg = false; 4298 if (FB[AMDGPU::FeatureGFX940Insts]) { 4299 switch (Opc) { 4300 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4301 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4302 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4303 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4304 UsesNeg = true; 4305 } 4306 } 4307 4308 if (IsNeg == UsesNeg) 4309 return true; 4310 4311 Error(BLGPLoc, 4312 UsesNeg ? "invalid modifier: blgp is not supported" 4313 : "invalid modifier: neg is not supported"); 4314 4315 return false; 4316 } 4317 4318 // gfx90a has an undocumented limitation: 4319 // DS_GWS opcodes must use even aligned registers. 4320 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4321 const OperandVector &Operands) { 4322 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4323 return true; 4324 4325 int Opc = Inst.getOpcode(); 4326 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4327 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4328 return true; 4329 4330 const MCRegisterInfo *MRI = getMRI(); 4331 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4332 int Data0Pos = 4333 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4334 assert(Data0Pos != -1); 4335 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4336 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4337 if (RegIdx & 1) { 4338 SMLoc RegLoc = getRegLoc(Reg, Operands); 4339 Error(RegLoc, "vgpr must be even aligned"); 4340 return false; 4341 } 4342 4343 return true; 4344 } 4345 4346 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4347 const OperandVector &Operands, 4348 const SMLoc &IDLoc) { 4349 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4350 AMDGPU::OpName::cpol); 4351 if (CPolPos == -1) 4352 return true; 4353 4354 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4355 4356 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4357 if ((TSFlags & (SIInstrFlags::SMRD)) && 4358 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4359 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4360 return false; 4361 } 4362 4363 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4364 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4365 StringRef CStr(S.getPointer()); 4366 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4367 Error(S, "scc is not supported on this GPU"); 4368 return false; 4369 } 4370 4371 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4372 return true; 4373 4374 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4375 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4376 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4377 : "instruction must use glc"); 4378 return false; 4379 } 4380 } else { 4381 if (CPol & CPol::GLC) { 4382 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4383 StringRef CStr(S.getPointer()); 4384 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4385 Error(S, isGFX940() ? "instruction must not use sc0" 4386 : "instruction must not use glc"); 4387 return false; 4388 } 4389 } 4390 4391 return true; 4392 } 4393 4394 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4395 const SMLoc &IDLoc, 4396 const OperandVector &Operands) { 4397 if (auto ErrMsg = validateLdsDirect(Inst)) { 4398 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4399 return false; 4400 } 4401 if (!validateSOPLiteral(Inst)) { 4402 Error(getLitLoc(Operands), 4403 "only one literal operand is allowed"); 4404 return false; 4405 } 4406 if (!validateVOPLiteral(Inst, Operands)) { 4407 return false; 4408 } 4409 if (!validateConstantBusLimitations(Inst, Operands)) { 4410 return false; 4411 } 4412 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4413 return false; 4414 } 4415 if (!validateIntClampSupported(Inst)) { 4416 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4417 "integer clamping is not supported on this GPU"); 4418 return false; 4419 } 4420 if (!validateOpSel(Inst)) { 4421 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4422 "invalid op_sel operand"); 4423 return false; 4424 } 4425 if (!validateDPP(Inst, Operands)) { 4426 return false; 4427 } 4428 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4429 if (!validateMIMGD16(Inst)) { 4430 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4431 "d16 modifier is not supported on this GPU"); 4432 return false; 4433 } 4434 if (!validateMIMGDim(Inst)) { 4435 Error(IDLoc, "dim modifier is required on this GPU"); 4436 return false; 4437 } 4438 if (!validateMIMGMSAA(Inst)) { 4439 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4440 "invalid dim; must be MSAA type"); 4441 return false; 4442 } 4443 if (!validateMIMGDataSize(Inst)) { 4444 Error(IDLoc, 4445 "image data size does not match dmask and tfe"); 4446 return false; 4447 } 4448 if (!validateMIMGAddrSize(Inst)) { 4449 Error(IDLoc, 4450 "image address size does not match dim and a16"); 4451 return false; 4452 } 4453 if (!validateMIMGAtomicDMask(Inst)) { 4454 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4455 "invalid atomic image dmask"); 4456 return false; 4457 } 4458 if (!validateMIMGGatherDMask(Inst)) { 4459 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4460 "invalid image_gather dmask: only one bit must be set"); 4461 return false; 4462 } 4463 if (!validateMovrels(Inst, Operands)) { 4464 return false; 4465 } 4466 if (!validateFlatOffset(Inst, Operands)) { 4467 return false; 4468 } 4469 if (!validateSMEMOffset(Inst, Operands)) { 4470 return false; 4471 } 4472 if (!validateMAIAccWrite(Inst, Operands)) { 4473 return false; 4474 } 4475 if (!validateMFMA(Inst, Operands)) { 4476 return false; 4477 } 4478 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4479 return false; 4480 } 4481 4482 if (!validateAGPRLdSt(Inst)) { 4483 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4484 ? "invalid register class: data and dst should be all VGPR or AGPR" 4485 : "invalid register class: agpr loads and stores not supported on this GPU" 4486 ); 4487 return false; 4488 } 4489 if (!validateVGPRAlign(Inst)) { 4490 Error(IDLoc, 4491 "invalid register class: vgpr tuples must be 64 bit aligned"); 4492 return false; 4493 } 4494 if (!validateGWS(Inst, Operands)) { 4495 return false; 4496 } 4497 4498 if (!validateBLGP(Inst, Operands)) { 4499 return false; 4500 } 4501 4502 if (!validateDivScale(Inst)) { 4503 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4504 return false; 4505 } 4506 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4507 return false; 4508 } 4509 4510 return true; 4511 } 4512 4513 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4514 const FeatureBitset &FBS, 4515 unsigned VariantID = 0); 4516 4517 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4518 const FeatureBitset &AvailableFeatures, 4519 unsigned VariantID); 4520 4521 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4522 const FeatureBitset &FBS) { 4523 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4524 } 4525 4526 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4527 const FeatureBitset &FBS, 4528 ArrayRef<unsigned> Variants) { 4529 for (auto Variant : Variants) { 4530 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4531 return true; 4532 } 4533 4534 return false; 4535 } 4536 4537 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4538 const SMLoc &IDLoc) { 4539 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4540 4541 // Check if requested instruction variant is supported. 4542 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4543 return false; 4544 4545 // This instruction is not supported. 4546 // Clear any other pending errors because they are no longer relevant. 4547 getParser().clearPendingErrors(); 4548 4549 // Requested instruction variant is not supported. 4550 // Check if any other variants are supported. 4551 StringRef VariantName = getMatchedVariantName(); 4552 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4553 return Error(IDLoc, 4554 Twine(VariantName, 4555 " variant of this instruction is not supported")); 4556 } 4557 4558 // Finally check if this instruction is supported on any other GPU. 4559 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4560 return Error(IDLoc, "instruction not supported on this GPU"); 4561 } 4562 4563 // Instruction not supported on any GPU. Probably a typo. 4564 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4565 return Error(IDLoc, "invalid instruction" + Suggestion); 4566 } 4567 4568 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4569 OperandVector &Operands, 4570 MCStreamer &Out, 4571 uint64_t &ErrorInfo, 4572 bool MatchingInlineAsm) { 4573 MCInst Inst; 4574 unsigned Result = Match_Success; 4575 for (auto Variant : getMatchedVariants()) { 4576 uint64_t EI; 4577 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4578 Variant); 4579 // We order match statuses from least to most specific. We use most specific 4580 // status as resulting 4581 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4582 if ((R == Match_Success) || 4583 (R == Match_PreferE32) || 4584 (R == Match_MissingFeature && Result != Match_PreferE32) || 4585 (R == Match_InvalidOperand && Result != Match_MissingFeature 4586 && Result != Match_PreferE32) || 4587 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4588 && Result != Match_MissingFeature 4589 && Result != Match_PreferE32)) { 4590 Result = R; 4591 ErrorInfo = EI; 4592 } 4593 if (R == Match_Success) 4594 break; 4595 } 4596 4597 if (Result == Match_Success) { 4598 if (!validateInstruction(Inst, IDLoc, Operands)) { 4599 return true; 4600 } 4601 Inst.setLoc(IDLoc); 4602 Out.emitInstruction(Inst, getSTI()); 4603 return false; 4604 } 4605 4606 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4607 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4608 return true; 4609 } 4610 4611 switch (Result) { 4612 default: break; 4613 case Match_MissingFeature: 4614 // It has been verified that the specified instruction 4615 // mnemonic is valid. A match was found but it requires 4616 // features which are not supported on this GPU. 4617 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4618 4619 case Match_InvalidOperand: { 4620 SMLoc ErrorLoc = IDLoc; 4621 if (ErrorInfo != ~0ULL) { 4622 if (ErrorInfo >= Operands.size()) { 4623 return Error(IDLoc, "too few operands for instruction"); 4624 } 4625 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4626 if (ErrorLoc == SMLoc()) 4627 ErrorLoc = IDLoc; 4628 } 4629 return Error(ErrorLoc, "invalid operand for instruction"); 4630 } 4631 4632 case Match_PreferE32: 4633 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4634 "should be encoded as e32"); 4635 case Match_MnemonicFail: 4636 llvm_unreachable("Invalid instructions should have been handled already"); 4637 } 4638 llvm_unreachable("Implement any new match types added!"); 4639 } 4640 4641 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4642 int64_t Tmp = -1; 4643 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4644 return true; 4645 } 4646 if (getParser().parseAbsoluteExpression(Tmp)) { 4647 return true; 4648 } 4649 Ret = static_cast<uint32_t>(Tmp); 4650 return false; 4651 } 4652 4653 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4654 uint32_t &Minor) { 4655 if (ParseAsAbsoluteExpression(Major)) 4656 return TokError("invalid major version"); 4657 4658 if (!trySkipToken(AsmToken::Comma)) 4659 return TokError("minor version number required, comma expected"); 4660 4661 if (ParseAsAbsoluteExpression(Minor)) 4662 return TokError("invalid minor version"); 4663 4664 return false; 4665 } 4666 4667 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4668 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4669 return TokError("directive only supported for amdgcn architecture"); 4670 4671 std::string TargetIDDirective; 4672 SMLoc TargetStart = getTok().getLoc(); 4673 if (getParser().parseEscapedString(TargetIDDirective)) 4674 return true; 4675 4676 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4677 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4678 return getParser().Error(TargetRange.Start, 4679 (Twine(".amdgcn_target directive's target id ") + 4680 Twine(TargetIDDirective) + 4681 Twine(" does not match the specified target id ") + 4682 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4683 4684 return false; 4685 } 4686 4687 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4688 return Error(Range.Start, "value out of range", Range); 4689 } 4690 4691 bool AMDGPUAsmParser::calculateGPRBlocks( 4692 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4693 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4694 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4695 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4696 // TODO(scott.linder): These calculations are duplicated from 4697 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4698 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4699 4700 unsigned NumVGPRs = NextFreeVGPR; 4701 unsigned NumSGPRs = NextFreeSGPR; 4702 4703 if (Version.Major >= 10) 4704 NumSGPRs = 0; 4705 else { 4706 unsigned MaxAddressableNumSGPRs = 4707 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4708 4709 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4710 NumSGPRs > MaxAddressableNumSGPRs) 4711 return OutOfRangeError(SGPRRange); 4712 4713 NumSGPRs += 4714 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4715 4716 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4717 NumSGPRs > MaxAddressableNumSGPRs) 4718 return OutOfRangeError(SGPRRange); 4719 4720 if (Features.test(FeatureSGPRInitBug)) 4721 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4722 } 4723 4724 VGPRBlocks = 4725 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4726 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4727 4728 return false; 4729 } 4730 4731 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4732 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4733 return TokError("directive only supported for amdgcn architecture"); 4734 4735 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4736 return TokError("directive only supported for amdhsa OS"); 4737 4738 StringRef KernelName; 4739 if (getParser().parseIdentifier(KernelName)) 4740 return true; 4741 4742 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4743 4744 StringSet<> Seen; 4745 4746 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4747 4748 SMRange VGPRRange; 4749 uint64_t NextFreeVGPR = 0; 4750 uint64_t AccumOffset = 0; 4751 uint64_t SharedVGPRCount = 0; 4752 SMRange SGPRRange; 4753 uint64_t NextFreeSGPR = 0; 4754 4755 // Count the number of user SGPRs implied from the enabled feature bits. 4756 unsigned ImpliedUserSGPRCount = 0; 4757 4758 // Track if the asm explicitly contains the directive for the user SGPR 4759 // count. 4760 Optional<unsigned> ExplicitUserSGPRCount; 4761 bool ReserveVCC = true; 4762 bool ReserveFlatScr = true; 4763 Optional<bool> EnableWavefrontSize32; 4764 4765 while (true) { 4766 while (trySkipToken(AsmToken::EndOfStatement)); 4767 4768 StringRef ID; 4769 SMRange IDRange = getTok().getLocRange(); 4770 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4771 return true; 4772 4773 if (ID == ".end_amdhsa_kernel") 4774 break; 4775 4776 if (Seen.find(ID) != Seen.end()) 4777 return TokError(".amdhsa_ directives cannot be repeated"); 4778 Seen.insert(ID); 4779 4780 SMLoc ValStart = getLoc(); 4781 int64_t IVal; 4782 if (getParser().parseAbsoluteExpression(IVal)) 4783 return true; 4784 SMLoc ValEnd = getLoc(); 4785 SMRange ValRange = SMRange(ValStart, ValEnd); 4786 4787 if (IVal < 0) 4788 return OutOfRangeError(ValRange); 4789 4790 uint64_t Val = IVal; 4791 4792 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4793 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4794 return OutOfRangeError(RANGE); \ 4795 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4796 4797 if (ID == ".amdhsa_group_segment_fixed_size") { 4798 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4799 return OutOfRangeError(ValRange); 4800 KD.group_segment_fixed_size = Val; 4801 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4802 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4803 return OutOfRangeError(ValRange); 4804 KD.private_segment_fixed_size = Val; 4805 } else if (ID == ".amdhsa_kernarg_size") { 4806 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4807 return OutOfRangeError(ValRange); 4808 KD.kernarg_size = Val; 4809 } else if (ID == ".amdhsa_user_sgpr_count") { 4810 ExplicitUserSGPRCount = Val; 4811 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4812 if (hasArchitectedFlatScratch()) 4813 return Error(IDRange.Start, 4814 "directive is not supported with architected flat scratch", 4815 IDRange); 4816 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4817 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4818 Val, ValRange); 4819 if (Val) 4820 ImpliedUserSGPRCount += 4; 4821 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4822 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4823 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4824 ValRange); 4825 if (Val) 4826 ImpliedUserSGPRCount += 2; 4827 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4828 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4829 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4830 ValRange); 4831 if (Val) 4832 ImpliedUserSGPRCount += 2; 4833 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4834 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4835 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4836 Val, ValRange); 4837 if (Val) 4838 ImpliedUserSGPRCount += 2; 4839 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4840 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4841 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4842 ValRange); 4843 if (Val) 4844 ImpliedUserSGPRCount += 2; 4845 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4846 if (hasArchitectedFlatScratch()) 4847 return Error(IDRange.Start, 4848 "directive is not supported with architected flat scratch", 4849 IDRange); 4850 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4851 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4852 ValRange); 4853 if (Val) 4854 ImpliedUserSGPRCount += 2; 4855 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4856 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4857 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4858 Val, ValRange); 4859 if (Val) 4860 ImpliedUserSGPRCount += 1; 4861 } else if (ID == ".amdhsa_wavefront_size32") { 4862 if (IVersion.Major < 10) 4863 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4864 EnableWavefrontSize32 = Val; 4865 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4866 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4867 Val, ValRange); 4868 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4869 if (hasArchitectedFlatScratch()) 4870 return Error(IDRange.Start, 4871 "directive is not supported with architected flat scratch", 4872 IDRange); 4873 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4874 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4875 } else if (ID == ".amdhsa_enable_private_segment") { 4876 if (!hasArchitectedFlatScratch()) 4877 return Error( 4878 IDRange.Start, 4879 "directive is not supported without architected flat scratch", 4880 IDRange); 4881 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4882 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4883 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4884 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4885 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4886 ValRange); 4887 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4888 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4889 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4890 ValRange); 4891 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4892 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4893 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4894 ValRange); 4895 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4896 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4897 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4898 ValRange); 4899 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4900 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4901 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4902 ValRange); 4903 } else if (ID == ".amdhsa_next_free_vgpr") { 4904 VGPRRange = ValRange; 4905 NextFreeVGPR = Val; 4906 } else if (ID == ".amdhsa_next_free_sgpr") { 4907 SGPRRange = ValRange; 4908 NextFreeSGPR = Val; 4909 } else if (ID == ".amdhsa_accum_offset") { 4910 if (!isGFX90A()) 4911 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4912 AccumOffset = Val; 4913 } else if (ID == ".amdhsa_reserve_vcc") { 4914 if (!isUInt<1>(Val)) 4915 return OutOfRangeError(ValRange); 4916 ReserveVCC = Val; 4917 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4918 if (IVersion.Major < 7) 4919 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4920 if (hasArchitectedFlatScratch()) 4921 return Error(IDRange.Start, 4922 "directive is not supported with architected flat scratch", 4923 IDRange); 4924 if (!isUInt<1>(Val)) 4925 return OutOfRangeError(ValRange); 4926 ReserveFlatScr = Val; 4927 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4928 if (IVersion.Major < 8) 4929 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4930 if (!isUInt<1>(Val)) 4931 return OutOfRangeError(ValRange); 4932 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4933 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4934 IDRange); 4935 } else if (ID == ".amdhsa_float_round_mode_32") { 4936 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4937 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4938 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4939 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4940 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4941 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4942 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4943 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4944 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4945 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4946 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4947 ValRange); 4948 } else if (ID == ".amdhsa_dx10_clamp") { 4949 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4950 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4951 } else if (ID == ".amdhsa_ieee_mode") { 4952 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4953 Val, ValRange); 4954 } else if (ID == ".amdhsa_fp16_overflow") { 4955 if (IVersion.Major < 9) 4956 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4957 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4958 ValRange); 4959 } else if (ID == ".amdhsa_tg_split") { 4960 if (!isGFX90A()) 4961 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4962 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4963 ValRange); 4964 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4965 if (IVersion.Major < 10) 4966 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4967 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4968 ValRange); 4969 } else if (ID == ".amdhsa_memory_ordered") { 4970 if (IVersion.Major < 10) 4971 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4972 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4973 ValRange); 4974 } else if (ID == ".amdhsa_forward_progress") { 4975 if (IVersion.Major < 10) 4976 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4977 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4978 ValRange); 4979 } else if (ID == ".amdhsa_shared_vgpr_count") { 4980 if (IVersion.Major < 10) 4981 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4982 SharedVGPRCount = Val; 4983 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4984 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4985 ValRange); 4986 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4987 PARSE_BITS_ENTRY( 4988 KD.compute_pgm_rsrc2, 4989 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4990 ValRange); 4991 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4992 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4993 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4994 Val, ValRange); 4995 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4996 PARSE_BITS_ENTRY( 4997 KD.compute_pgm_rsrc2, 4998 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4999 ValRange); 5000 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5001 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5002 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5003 Val, ValRange); 5004 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5005 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5006 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5007 Val, ValRange); 5008 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5009 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5010 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5011 Val, ValRange); 5012 } else if (ID == ".amdhsa_exception_int_div_zero") { 5013 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5014 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5015 Val, ValRange); 5016 } else { 5017 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5018 } 5019 5020 #undef PARSE_BITS_ENTRY 5021 } 5022 5023 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5024 return TokError(".amdhsa_next_free_vgpr directive is required"); 5025 5026 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5027 return TokError(".amdhsa_next_free_sgpr directive is required"); 5028 5029 unsigned VGPRBlocks; 5030 unsigned SGPRBlocks; 5031 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5032 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5033 EnableWavefrontSize32, NextFreeVGPR, 5034 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5035 SGPRBlocks)) 5036 return true; 5037 5038 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5039 VGPRBlocks)) 5040 return OutOfRangeError(VGPRRange); 5041 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5042 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5043 5044 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5045 SGPRBlocks)) 5046 return OutOfRangeError(SGPRRange); 5047 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5048 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5049 SGPRBlocks); 5050 5051 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5052 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5053 "enabled user SGPRs"); 5054 5055 unsigned UserSGPRCount = 5056 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5057 5058 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5059 return TokError("too many user SGPRs enabled"); 5060 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5061 UserSGPRCount); 5062 5063 if (isGFX90A()) { 5064 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5065 return TokError(".amdhsa_accum_offset directive is required"); 5066 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5067 return TokError("accum_offset should be in range [4..256] in " 5068 "increments of 4"); 5069 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5070 return TokError("accum_offset exceeds total VGPR allocation"); 5071 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5072 (AccumOffset / 4 - 1)); 5073 } 5074 5075 if (IVersion.Major == 10) { 5076 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5077 if (SharedVGPRCount && EnableWavefrontSize32) { 5078 return TokError("shared_vgpr_count directive not valid on " 5079 "wavefront size 32"); 5080 } 5081 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5082 return TokError("shared_vgpr_count*2 + " 5083 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5084 "exceed 63\n"); 5085 } 5086 } 5087 5088 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5089 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5090 ReserveFlatScr); 5091 return false; 5092 } 5093 5094 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5095 uint32_t Major; 5096 uint32_t Minor; 5097 5098 if (ParseDirectiveMajorMinor(Major, Minor)) 5099 return true; 5100 5101 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5102 return false; 5103 } 5104 5105 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5106 uint32_t Major; 5107 uint32_t Minor; 5108 uint32_t Stepping; 5109 StringRef VendorName; 5110 StringRef ArchName; 5111 5112 // If this directive has no arguments, then use the ISA version for the 5113 // targeted GPU. 5114 if (isToken(AsmToken::EndOfStatement)) { 5115 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5116 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5117 ISA.Stepping, 5118 "AMD", "AMDGPU"); 5119 return false; 5120 } 5121 5122 if (ParseDirectiveMajorMinor(Major, Minor)) 5123 return true; 5124 5125 if (!trySkipToken(AsmToken::Comma)) 5126 return TokError("stepping version number required, comma expected"); 5127 5128 if (ParseAsAbsoluteExpression(Stepping)) 5129 return TokError("invalid stepping version"); 5130 5131 if (!trySkipToken(AsmToken::Comma)) 5132 return TokError("vendor name required, comma expected"); 5133 5134 if (!parseString(VendorName, "invalid vendor name")) 5135 return true; 5136 5137 if (!trySkipToken(AsmToken::Comma)) 5138 return TokError("arch name required, comma expected"); 5139 5140 if (!parseString(ArchName, "invalid arch name")) 5141 return true; 5142 5143 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5144 VendorName, ArchName); 5145 return false; 5146 } 5147 5148 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5149 amd_kernel_code_t &Header) { 5150 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5151 // assembly for backwards compatibility. 5152 if (ID == "max_scratch_backing_memory_byte_size") { 5153 Parser.eatToEndOfStatement(); 5154 return false; 5155 } 5156 5157 SmallString<40> ErrStr; 5158 raw_svector_ostream Err(ErrStr); 5159 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5160 return TokError(Err.str()); 5161 } 5162 Lex(); 5163 5164 if (ID == "enable_wavefront_size32") { 5165 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5166 if (!isGFX10Plus()) 5167 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5168 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5169 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5170 } else { 5171 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5172 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5173 } 5174 } 5175 5176 if (ID == "wavefront_size") { 5177 if (Header.wavefront_size == 5) { 5178 if (!isGFX10Plus()) 5179 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5180 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5181 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5182 } else if (Header.wavefront_size == 6) { 5183 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5184 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5185 } 5186 } 5187 5188 if (ID == "enable_wgp_mode") { 5189 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5190 !isGFX10Plus()) 5191 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5192 } 5193 5194 if (ID == "enable_mem_ordered") { 5195 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5196 !isGFX10Plus()) 5197 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5198 } 5199 5200 if (ID == "enable_fwd_progress") { 5201 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5202 !isGFX10Plus()) 5203 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5204 } 5205 5206 return false; 5207 } 5208 5209 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5210 amd_kernel_code_t Header; 5211 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5212 5213 while (true) { 5214 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5215 // will set the current token to EndOfStatement. 5216 while(trySkipToken(AsmToken::EndOfStatement)); 5217 5218 StringRef ID; 5219 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5220 return true; 5221 5222 if (ID == ".end_amd_kernel_code_t") 5223 break; 5224 5225 if (ParseAMDKernelCodeTValue(ID, Header)) 5226 return true; 5227 } 5228 5229 getTargetStreamer().EmitAMDKernelCodeT(Header); 5230 5231 return false; 5232 } 5233 5234 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5235 StringRef KernelName; 5236 if (!parseId(KernelName, "expected symbol name")) 5237 return true; 5238 5239 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5240 ELF::STT_AMDGPU_HSA_KERNEL); 5241 5242 KernelScope.initialize(getContext()); 5243 return false; 5244 } 5245 5246 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5247 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5248 return Error(getLoc(), 5249 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5250 "architectures"); 5251 } 5252 5253 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5254 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5255 return Error(getParser().getTok().getLoc(), "target id must match options"); 5256 5257 getTargetStreamer().EmitISAVersion(); 5258 Lex(); 5259 5260 return false; 5261 } 5262 5263 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5264 const char *AssemblerDirectiveBegin; 5265 const char *AssemblerDirectiveEnd; 5266 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5267 isHsaAbiVersion3AndAbove(&getSTI()) 5268 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5269 HSAMD::V3::AssemblerDirectiveEnd) 5270 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5271 HSAMD::AssemblerDirectiveEnd); 5272 5273 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5274 return Error(getLoc(), 5275 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5276 "not available on non-amdhsa OSes")).str()); 5277 } 5278 5279 std::string HSAMetadataString; 5280 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5281 HSAMetadataString)) 5282 return true; 5283 5284 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5285 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5286 return Error(getLoc(), "invalid HSA metadata"); 5287 } else { 5288 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5289 return Error(getLoc(), "invalid HSA metadata"); 5290 } 5291 5292 return false; 5293 } 5294 5295 /// Common code to parse out a block of text (typically YAML) between start and 5296 /// end directives. 5297 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5298 const char *AssemblerDirectiveEnd, 5299 std::string &CollectString) { 5300 5301 raw_string_ostream CollectStream(CollectString); 5302 5303 getLexer().setSkipSpace(false); 5304 5305 bool FoundEnd = false; 5306 while (!isToken(AsmToken::Eof)) { 5307 while (isToken(AsmToken::Space)) { 5308 CollectStream << getTokenStr(); 5309 Lex(); 5310 } 5311 5312 if (trySkipId(AssemblerDirectiveEnd)) { 5313 FoundEnd = true; 5314 break; 5315 } 5316 5317 CollectStream << Parser.parseStringToEndOfStatement() 5318 << getContext().getAsmInfo()->getSeparatorString(); 5319 5320 Parser.eatToEndOfStatement(); 5321 } 5322 5323 getLexer().setSkipSpace(true); 5324 5325 if (isToken(AsmToken::Eof) && !FoundEnd) { 5326 return TokError(Twine("expected directive ") + 5327 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5328 } 5329 5330 CollectStream.flush(); 5331 return false; 5332 } 5333 5334 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5335 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5336 std::string String; 5337 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5338 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5339 return true; 5340 5341 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5342 if (!PALMetadata->setFromString(String)) 5343 return Error(getLoc(), "invalid PAL metadata"); 5344 return false; 5345 } 5346 5347 /// Parse the assembler directive for old linear-format PAL metadata. 5348 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5349 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5350 return Error(getLoc(), 5351 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5352 "not available on non-amdpal OSes")).str()); 5353 } 5354 5355 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5356 PALMetadata->setLegacy(); 5357 for (;;) { 5358 uint32_t Key, Value; 5359 if (ParseAsAbsoluteExpression(Key)) { 5360 return TokError(Twine("invalid value in ") + 5361 Twine(PALMD::AssemblerDirective)); 5362 } 5363 if (!trySkipToken(AsmToken::Comma)) { 5364 return TokError(Twine("expected an even number of values in ") + 5365 Twine(PALMD::AssemblerDirective)); 5366 } 5367 if (ParseAsAbsoluteExpression(Value)) { 5368 return TokError(Twine("invalid value in ") + 5369 Twine(PALMD::AssemblerDirective)); 5370 } 5371 PALMetadata->setRegister(Key, Value); 5372 if (!trySkipToken(AsmToken::Comma)) 5373 break; 5374 } 5375 return false; 5376 } 5377 5378 /// ParseDirectiveAMDGPULDS 5379 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5380 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5381 if (getParser().checkForValidSection()) 5382 return true; 5383 5384 StringRef Name; 5385 SMLoc NameLoc = getLoc(); 5386 if (getParser().parseIdentifier(Name)) 5387 return TokError("expected identifier in directive"); 5388 5389 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5390 if (parseToken(AsmToken::Comma, "expected ','")) 5391 return true; 5392 5393 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5394 5395 int64_t Size; 5396 SMLoc SizeLoc = getLoc(); 5397 if (getParser().parseAbsoluteExpression(Size)) 5398 return true; 5399 if (Size < 0) 5400 return Error(SizeLoc, "size must be non-negative"); 5401 if (Size > LocalMemorySize) 5402 return Error(SizeLoc, "size is too large"); 5403 5404 int64_t Alignment = 4; 5405 if (trySkipToken(AsmToken::Comma)) { 5406 SMLoc AlignLoc = getLoc(); 5407 if (getParser().parseAbsoluteExpression(Alignment)) 5408 return true; 5409 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5410 return Error(AlignLoc, "alignment must be a power of two"); 5411 5412 // Alignment larger than the size of LDS is possible in theory, as long 5413 // as the linker manages to place to symbol at address 0, but we do want 5414 // to make sure the alignment fits nicely into a 32-bit integer. 5415 if (Alignment >= 1u << 31) 5416 return Error(AlignLoc, "alignment is too large"); 5417 } 5418 5419 if (parseToken(AsmToken::EndOfStatement, 5420 "unexpected token in '.amdgpu_lds' directive")) 5421 return true; 5422 5423 Symbol->redefineIfPossible(); 5424 if (!Symbol->isUndefined()) 5425 return Error(NameLoc, "invalid symbol redefinition"); 5426 5427 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5428 return false; 5429 } 5430 5431 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5432 StringRef IDVal = DirectiveID.getString(); 5433 5434 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5435 if (IDVal == ".amdhsa_kernel") 5436 return ParseDirectiveAMDHSAKernel(); 5437 5438 // TODO: Restructure/combine with PAL metadata directive. 5439 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5440 return ParseDirectiveHSAMetadata(); 5441 } else { 5442 if (IDVal == ".hsa_code_object_version") 5443 return ParseDirectiveHSACodeObjectVersion(); 5444 5445 if (IDVal == ".hsa_code_object_isa") 5446 return ParseDirectiveHSACodeObjectISA(); 5447 5448 if (IDVal == ".amd_kernel_code_t") 5449 return ParseDirectiveAMDKernelCodeT(); 5450 5451 if (IDVal == ".amdgpu_hsa_kernel") 5452 return ParseDirectiveAMDGPUHsaKernel(); 5453 5454 if (IDVal == ".amd_amdgpu_isa") 5455 return ParseDirectiveISAVersion(); 5456 5457 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5458 return ParseDirectiveHSAMetadata(); 5459 } 5460 5461 if (IDVal == ".amdgcn_target") 5462 return ParseDirectiveAMDGCNTarget(); 5463 5464 if (IDVal == ".amdgpu_lds") 5465 return ParseDirectiveAMDGPULDS(); 5466 5467 if (IDVal == PALMD::AssemblerDirectiveBegin) 5468 return ParseDirectivePALMetadataBegin(); 5469 5470 if (IDVal == PALMD::AssemblerDirective) 5471 return ParseDirectivePALMetadata(); 5472 5473 return true; 5474 } 5475 5476 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5477 unsigned RegNo) { 5478 5479 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5480 return isGFX9Plus(); 5481 5482 // GFX10 has 2 more SGPRs 104 and 105. 5483 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5484 return hasSGPR104_SGPR105(); 5485 5486 switch (RegNo) { 5487 case AMDGPU::SRC_SHARED_BASE: 5488 case AMDGPU::SRC_SHARED_LIMIT: 5489 case AMDGPU::SRC_PRIVATE_BASE: 5490 case AMDGPU::SRC_PRIVATE_LIMIT: 5491 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5492 return isGFX9Plus(); 5493 case AMDGPU::TBA: 5494 case AMDGPU::TBA_LO: 5495 case AMDGPU::TBA_HI: 5496 case AMDGPU::TMA: 5497 case AMDGPU::TMA_LO: 5498 case AMDGPU::TMA_HI: 5499 return !isGFX9Plus(); 5500 case AMDGPU::XNACK_MASK: 5501 case AMDGPU::XNACK_MASK_LO: 5502 case AMDGPU::XNACK_MASK_HI: 5503 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5504 case AMDGPU::SGPR_NULL: 5505 return isGFX10Plus(); 5506 default: 5507 break; 5508 } 5509 5510 if (isCI()) 5511 return true; 5512 5513 if (isSI() || isGFX10Plus()) { 5514 // No flat_scr on SI. 5515 // On GFX10 flat scratch is not a valid register operand and can only be 5516 // accessed with s_setreg/s_getreg. 5517 switch (RegNo) { 5518 case AMDGPU::FLAT_SCR: 5519 case AMDGPU::FLAT_SCR_LO: 5520 case AMDGPU::FLAT_SCR_HI: 5521 return false; 5522 default: 5523 return true; 5524 } 5525 } 5526 5527 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5528 // SI/CI have. 5529 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5530 return hasSGPR102_SGPR103(); 5531 5532 return true; 5533 } 5534 5535 OperandMatchResultTy 5536 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5537 OperandMode Mode) { 5538 // Try to parse with a custom parser 5539 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5540 5541 // If we successfully parsed the operand or if there as an error parsing, 5542 // we are done. 5543 // 5544 // If we are parsing after we reach EndOfStatement then this means we 5545 // are appending default values to the Operands list. This is only done 5546 // by custom parser, so we shouldn't continue on to the generic parsing. 5547 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5548 isToken(AsmToken::EndOfStatement)) 5549 return ResTy; 5550 5551 SMLoc RBraceLoc; 5552 SMLoc LBraceLoc = getLoc(); 5553 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5554 unsigned Prefix = Operands.size(); 5555 5556 for (;;) { 5557 auto Loc = getLoc(); 5558 ResTy = parseReg(Operands); 5559 if (ResTy == MatchOperand_NoMatch) 5560 Error(Loc, "expected a register"); 5561 if (ResTy != MatchOperand_Success) 5562 return MatchOperand_ParseFail; 5563 5564 RBraceLoc = getLoc(); 5565 if (trySkipToken(AsmToken::RBrac)) 5566 break; 5567 5568 if (!skipToken(AsmToken::Comma, 5569 "expected a comma or a closing square bracket")) { 5570 return MatchOperand_ParseFail; 5571 } 5572 } 5573 5574 if (Operands.size() - Prefix > 1) { 5575 Operands.insert(Operands.begin() + Prefix, 5576 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5577 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5578 } 5579 5580 return MatchOperand_Success; 5581 } 5582 5583 return parseRegOrImm(Operands); 5584 } 5585 5586 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5587 // Clear any forced encodings from the previous instruction. 5588 setForcedEncodingSize(0); 5589 setForcedDPP(false); 5590 setForcedSDWA(false); 5591 5592 if (Name.endswith("_e64")) { 5593 setForcedEncodingSize(64); 5594 return Name.substr(0, Name.size() - 4); 5595 } else if (Name.endswith("_e32")) { 5596 setForcedEncodingSize(32); 5597 return Name.substr(0, Name.size() - 4); 5598 } else if (Name.endswith("_dpp")) { 5599 setForcedDPP(true); 5600 return Name.substr(0, Name.size() - 4); 5601 } else if (Name.endswith("_sdwa")) { 5602 setForcedSDWA(true); 5603 return Name.substr(0, Name.size() - 5); 5604 } 5605 return Name; 5606 } 5607 5608 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5609 StringRef Name, 5610 SMLoc NameLoc, OperandVector &Operands) { 5611 // Add the instruction mnemonic 5612 Name = parseMnemonicSuffix(Name); 5613 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5614 5615 bool IsMIMG = Name.startswith("image_"); 5616 5617 while (!trySkipToken(AsmToken::EndOfStatement)) { 5618 OperandMode Mode = OperandMode_Default; 5619 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5620 Mode = OperandMode_NSA; 5621 CPolSeen = 0; 5622 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5623 5624 if (Res != MatchOperand_Success) { 5625 checkUnsupportedInstruction(Name, NameLoc); 5626 if (!Parser.hasPendingError()) { 5627 // FIXME: use real operand location rather than the current location. 5628 StringRef Msg = 5629 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5630 "not a valid operand."; 5631 Error(getLoc(), Msg); 5632 } 5633 while (!trySkipToken(AsmToken::EndOfStatement)) { 5634 lex(); 5635 } 5636 return true; 5637 } 5638 5639 // Eat the comma or space if there is one. 5640 trySkipToken(AsmToken::Comma); 5641 } 5642 5643 return false; 5644 } 5645 5646 //===----------------------------------------------------------------------===// 5647 // Utility functions 5648 //===----------------------------------------------------------------------===// 5649 5650 OperandMatchResultTy 5651 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5652 5653 if (!trySkipId(Prefix, AsmToken::Colon)) 5654 return MatchOperand_NoMatch; 5655 5656 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5657 } 5658 5659 OperandMatchResultTy 5660 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5661 AMDGPUOperand::ImmTy ImmTy, 5662 bool (*ConvertResult)(int64_t&)) { 5663 SMLoc S = getLoc(); 5664 int64_t Value = 0; 5665 5666 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5667 if (Res != MatchOperand_Success) 5668 return Res; 5669 5670 if (ConvertResult && !ConvertResult(Value)) { 5671 Error(S, "invalid " + StringRef(Prefix) + " value."); 5672 } 5673 5674 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5675 return MatchOperand_Success; 5676 } 5677 5678 OperandMatchResultTy 5679 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5680 OperandVector &Operands, 5681 AMDGPUOperand::ImmTy ImmTy, 5682 bool (*ConvertResult)(int64_t&)) { 5683 SMLoc S = getLoc(); 5684 if (!trySkipId(Prefix, AsmToken::Colon)) 5685 return MatchOperand_NoMatch; 5686 5687 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5688 return MatchOperand_ParseFail; 5689 5690 unsigned Val = 0; 5691 const unsigned MaxSize = 4; 5692 5693 // FIXME: How to verify the number of elements matches the number of src 5694 // operands? 5695 for (int I = 0; ; ++I) { 5696 int64_t Op; 5697 SMLoc Loc = getLoc(); 5698 if (!parseExpr(Op)) 5699 return MatchOperand_ParseFail; 5700 5701 if (Op != 0 && Op != 1) { 5702 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5703 return MatchOperand_ParseFail; 5704 } 5705 5706 Val |= (Op << I); 5707 5708 if (trySkipToken(AsmToken::RBrac)) 5709 break; 5710 5711 if (I + 1 == MaxSize) { 5712 Error(getLoc(), "expected a closing square bracket"); 5713 return MatchOperand_ParseFail; 5714 } 5715 5716 if (!skipToken(AsmToken::Comma, "expected a comma")) 5717 return MatchOperand_ParseFail; 5718 } 5719 5720 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5721 return MatchOperand_Success; 5722 } 5723 5724 OperandMatchResultTy 5725 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5726 AMDGPUOperand::ImmTy ImmTy) { 5727 int64_t Bit; 5728 SMLoc S = getLoc(); 5729 5730 if (trySkipId(Name)) { 5731 Bit = 1; 5732 } else if (trySkipId("no", Name)) { 5733 Bit = 0; 5734 } else { 5735 return MatchOperand_NoMatch; 5736 } 5737 5738 if (Name == "r128" && !hasMIMG_R128()) { 5739 Error(S, "r128 modifier is not supported on this GPU"); 5740 return MatchOperand_ParseFail; 5741 } 5742 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5743 Error(S, "a16 modifier is not supported on this GPU"); 5744 return MatchOperand_ParseFail; 5745 } 5746 5747 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5748 ImmTy = AMDGPUOperand::ImmTyR128A16; 5749 5750 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5751 return MatchOperand_Success; 5752 } 5753 5754 OperandMatchResultTy 5755 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5756 unsigned CPolOn = 0; 5757 unsigned CPolOff = 0; 5758 SMLoc S = getLoc(); 5759 5760 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5761 if (isGFX940() && !Mnemo.startswith("s_")) { 5762 if (trySkipId("sc0")) 5763 CPolOn = AMDGPU::CPol::SC0; 5764 else if (trySkipId("nosc0")) 5765 CPolOff = AMDGPU::CPol::SC0; 5766 else if (trySkipId("nt")) 5767 CPolOn = AMDGPU::CPol::NT; 5768 else if (trySkipId("nont")) 5769 CPolOff = AMDGPU::CPol::NT; 5770 else if (trySkipId("sc1")) 5771 CPolOn = AMDGPU::CPol::SC1; 5772 else if (trySkipId("nosc1")) 5773 CPolOff = AMDGPU::CPol::SC1; 5774 else 5775 return MatchOperand_NoMatch; 5776 } 5777 else if (trySkipId("glc")) 5778 CPolOn = AMDGPU::CPol::GLC; 5779 else if (trySkipId("noglc")) 5780 CPolOff = AMDGPU::CPol::GLC; 5781 else if (trySkipId("slc")) 5782 CPolOn = AMDGPU::CPol::SLC; 5783 else if (trySkipId("noslc")) 5784 CPolOff = AMDGPU::CPol::SLC; 5785 else if (trySkipId("dlc")) 5786 CPolOn = AMDGPU::CPol::DLC; 5787 else if (trySkipId("nodlc")) 5788 CPolOff = AMDGPU::CPol::DLC; 5789 else if (trySkipId("scc")) 5790 CPolOn = AMDGPU::CPol::SCC; 5791 else if (trySkipId("noscc")) 5792 CPolOff = AMDGPU::CPol::SCC; 5793 else 5794 return MatchOperand_NoMatch; 5795 5796 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5797 Error(S, "dlc modifier is not supported on this GPU"); 5798 return MatchOperand_ParseFail; 5799 } 5800 5801 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5802 Error(S, "scc modifier is not supported on this GPU"); 5803 return MatchOperand_ParseFail; 5804 } 5805 5806 if (CPolSeen & (CPolOn | CPolOff)) { 5807 Error(S, "duplicate cache policy modifier"); 5808 return MatchOperand_ParseFail; 5809 } 5810 5811 CPolSeen |= (CPolOn | CPolOff); 5812 5813 for (unsigned I = 1; I != Operands.size(); ++I) { 5814 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5815 if (Op.isCPol()) { 5816 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5817 return MatchOperand_Success; 5818 } 5819 } 5820 5821 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5822 AMDGPUOperand::ImmTyCPol)); 5823 5824 return MatchOperand_Success; 5825 } 5826 5827 static void addOptionalImmOperand( 5828 MCInst& Inst, const OperandVector& Operands, 5829 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5830 AMDGPUOperand::ImmTy ImmT, 5831 int64_t Default = 0) { 5832 auto i = OptionalIdx.find(ImmT); 5833 if (i != OptionalIdx.end()) { 5834 unsigned Idx = i->second; 5835 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5836 } else { 5837 Inst.addOperand(MCOperand::createImm(Default)); 5838 } 5839 } 5840 5841 OperandMatchResultTy 5842 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5843 StringRef &Value, 5844 SMLoc &StringLoc) { 5845 if (!trySkipId(Prefix, AsmToken::Colon)) 5846 return MatchOperand_NoMatch; 5847 5848 StringLoc = getLoc(); 5849 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5850 : MatchOperand_ParseFail; 5851 } 5852 5853 //===----------------------------------------------------------------------===// 5854 // MTBUF format 5855 //===----------------------------------------------------------------------===// 5856 5857 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5858 int64_t MaxVal, 5859 int64_t &Fmt) { 5860 int64_t Val; 5861 SMLoc Loc = getLoc(); 5862 5863 auto Res = parseIntWithPrefix(Pref, Val); 5864 if (Res == MatchOperand_ParseFail) 5865 return false; 5866 if (Res == MatchOperand_NoMatch) 5867 return true; 5868 5869 if (Val < 0 || Val > MaxVal) { 5870 Error(Loc, Twine("out of range ", StringRef(Pref))); 5871 return false; 5872 } 5873 5874 Fmt = Val; 5875 return true; 5876 } 5877 5878 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5879 // values to live in a joint format operand in the MCInst encoding. 5880 OperandMatchResultTy 5881 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5882 using namespace llvm::AMDGPU::MTBUFFormat; 5883 5884 int64_t Dfmt = DFMT_UNDEF; 5885 int64_t Nfmt = NFMT_UNDEF; 5886 5887 // dfmt and nfmt can appear in either order, and each is optional. 5888 for (int I = 0; I < 2; ++I) { 5889 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5890 return MatchOperand_ParseFail; 5891 5892 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5893 return MatchOperand_ParseFail; 5894 } 5895 // Skip optional comma between dfmt/nfmt 5896 // but guard against 2 commas following each other. 5897 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5898 !peekToken().is(AsmToken::Comma)) { 5899 trySkipToken(AsmToken::Comma); 5900 } 5901 } 5902 5903 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5904 return MatchOperand_NoMatch; 5905 5906 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5907 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5908 5909 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5910 return MatchOperand_Success; 5911 } 5912 5913 OperandMatchResultTy 5914 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5915 using namespace llvm::AMDGPU::MTBUFFormat; 5916 5917 int64_t Fmt = UFMT_UNDEF; 5918 5919 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5920 return MatchOperand_ParseFail; 5921 5922 if (Fmt == UFMT_UNDEF) 5923 return MatchOperand_NoMatch; 5924 5925 Format = Fmt; 5926 return MatchOperand_Success; 5927 } 5928 5929 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5930 int64_t &Nfmt, 5931 StringRef FormatStr, 5932 SMLoc Loc) { 5933 using namespace llvm::AMDGPU::MTBUFFormat; 5934 int64_t Format; 5935 5936 Format = getDfmt(FormatStr); 5937 if (Format != DFMT_UNDEF) { 5938 Dfmt = Format; 5939 return true; 5940 } 5941 5942 Format = getNfmt(FormatStr, getSTI()); 5943 if (Format != NFMT_UNDEF) { 5944 Nfmt = Format; 5945 return true; 5946 } 5947 5948 Error(Loc, "unsupported format"); 5949 return false; 5950 } 5951 5952 OperandMatchResultTy 5953 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5954 SMLoc FormatLoc, 5955 int64_t &Format) { 5956 using namespace llvm::AMDGPU::MTBUFFormat; 5957 5958 int64_t Dfmt = DFMT_UNDEF; 5959 int64_t Nfmt = NFMT_UNDEF; 5960 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5961 return MatchOperand_ParseFail; 5962 5963 if (trySkipToken(AsmToken::Comma)) { 5964 StringRef Str; 5965 SMLoc Loc = getLoc(); 5966 if (!parseId(Str, "expected a format string") || 5967 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5968 return MatchOperand_ParseFail; 5969 } 5970 if (Dfmt == DFMT_UNDEF) { 5971 Error(Loc, "duplicate numeric format"); 5972 return MatchOperand_ParseFail; 5973 } else if (Nfmt == NFMT_UNDEF) { 5974 Error(Loc, "duplicate data format"); 5975 return MatchOperand_ParseFail; 5976 } 5977 } 5978 5979 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5980 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5981 5982 if (isGFX10Plus()) { 5983 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5984 if (Ufmt == UFMT_UNDEF) { 5985 Error(FormatLoc, "unsupported format"); 5986 return MatchOperand_ParseFail; 5987 } 5988 Format = Ufmt; 5989 } else { 5990 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5991 } 5992 5993 return MatchOperand_Success; 5994 } 5995 5996 OperandMatchResultTy 5997 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5998 SMLoc Loc, 5999 int64_t &Format) { 6000 using namespace llvm::AMDGPU::MTBUFFormat; 6001 6002 auto Id = getUnifiedFormat(FormatStr); 6003 if (Id == UFMT_UNDEF) 6004 return MatchOperand_NoMatch; 6005 6006 if (!isGFX10Plus()) { 6007 Error(Loc, "unified format is not supported on this GPU"); 6008 return MatchOperand_ParseFail; 6009 } 6010 6011 Format = Id; 6012 return MatchOperand_Success; 6013 } 6014 6015 OperandMatchResultTy 6016 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6017 using namespace llvm::AMDGPU::MTBUFFormat; 6018 SMLoc Loc = getLoc(); 6019 6020 if (!parseExpr(Format)) 6021 return MatchOperand_ParseFail; 6022 if (!isValidFormatEncoding(Format, getSTI())) { 6023 Error(Loc, "out of range format"); 6024 return MatchOperand_ParseFail; 6025 } 6026 6027 return MatchOperand_Success; 6028 } 6029 6030 OperandMatchResultTy 6031 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6032 using namespace llvm::AMDGPU::MTBUFFormat; 6033 6034 if (!trySkipId("format", AsmToken::Colon)) 6035 return MatchOperand_NoMatch; 6036 6037 if (trySkipToken(AsmToken::LBrac)) { 6038 StringRef FormatStr; 6039 SMLoc Loc = getLoc(); 6040 if (!parseId(FormatStr, "expected a format string")) 6041 return MatchOperand_ParseFail; 6042 6043 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6044 if (Res == MatchOperand_NoMatch) 6045 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6046 if (Res != MatchOperand_Success) 6047 return Res; 6048 6049 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6050 return MatchOperand_ParseFail; 6051 6052 return MatchOperand_Success; 6053 } 6054 6055 return parseNumericFormat(Format); 6056 } 6057 6058 OperandMatchResultTy 6059 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6060 using namespace llvm::AMDGPU::MTBUFFormat; 6061 6062 int64_t Format = getDefaultFormatEncoding(getSTI()); 6063 OperandMatchResultTy Res; 6064 SMLoc Loc = getLoc(); 6065 6066 // Parse legacy format syntax. 6067 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6068 if (Res == MatchOperand_ParseFail) 6069 return Res; 6070 6071 bool FormatFound = (Res == MatchOperand_Success); 6072 6073 Operands.push_back( 6074 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6075 6076 if (FormatFound) 6077 trySkipToken(AsmToken::Comma); 6078 6079 if (isToken(AsmToken::EndOfStatement)) { 6080 // We are expecting an soffset operand, 6081 // but let matcher handle the error. 6082 return MatchOperand_Success; 6083 } 6084 6085 // Parse soffset. 6086 Res = parseRegOrImm(Operands); 6087 if (Res != MatchOperand_Success) 6088 return Res; 6089 6090 trySkipToken(AsmToken::Comma); 6091 6092 if (!FormatFound) { 6093 Res = parseSymbolicOrNumericFormat(Format); 6094 if (Res == MatchOperand_ParseFail) 6095 return Res; 6096 if (Res == MatchOperand_Success) { 6097 auto Size = Operands.size(); 6098 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6099 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6100 Op.setImm(Format); 6101 } 6102 return MatchOperand_Success; 6103 } 6104 6105 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6106 Error(getLoc(), "duplicate format"); 6107 return MatchOperand_ParseFail; 6108 } 6109 return MatchOperand_Success; 6110 } 6111 6112 //===----------------------------------------------------------------------===// 6113 // ds 6114 //===----------------------------------------------------------------------===// 6115 6116 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6117 const OperandVector &Operands) { 6118 OptionalImmIndexMap OptionalIdx; 6119 6120 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6121 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6122 6123 // Add the register arguments 6124 if (Op.isReg()) { 6125 Op.addRegOperands(Inst, 1); 6126 continue; 6127 } 6128 6129 // Handle optional arguments 6130 OptionalIdx[Op.getImmTy()] = i; 6131 } 6132 6133 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6134 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6136 6137 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6138 } 6139 6140 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6141 bool IsGdsHardcoded) { 6142 OptionalImmIndexMap OptionalIdx; 6143 6144 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6145 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6146 6147 // Add the register arguments 6148 if (Op.isReg()) { 6149 Op.addRegOperands(Inst, 1); 6150 continue; 6151 } 6152 6153 if (Op.isToken() && Op.getToken() == "gds") { 6154 IsGdsHardcoded = true; 6155 continue; 6156 } 6157 6158 // Handle optional arguments 6159 OptionalIdx[Op.getImmTy()] = i; 6160 } 6161 6162 AMDGPUOperand::ImmTy OffsetType = 6163 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6164 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6165 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6166 AMDGPUOperand::ImmTyOffset; 6167 6168 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6169 6170 if (!IsGdsHardcoded) { 6171 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6172 } 6173 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6174 } 6175 6176 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6177 OptionalImmIndexMap OptionalIdx; 6178 6179 unsigned OperandIdx[4]; 6180 unsigned EnMask = 0; 6181 int SrcIdx = 0; 6182 6183 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6184 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6185 6186 // Add the register arguments 6187 if (Op.isReg()) { 6188 assert(SrcIdx < 4); 6189 OperandIdx[SrcIdx] = Inst.size(); 6190 Op.addRegOperands(Inst, 1); 6191 ++SrcIdx; 6192 continue; 6193 } 6194 6195 if (Op.isOff()) { 6196 assert(SrcIdx < 4); 6197 OperandIdx[SrcIdx] = Inst.size(); 6198 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6199 ++SrcIdx; 6200 continue; 6201 } 6202 6203 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6204 Op.addImmOperands(Inst, 1); 6205 continue; 6206 } 6207 6208 if (Op.isToken() && Op.getToken() == "done") 6209 continue; 6210 6211 // Handle optional arguments 6212 OptionalIdx[Op.getImmTy()] = i; 6213 } 6214 6215 assert(SrcIdx == 4); 6216 6217 bool Compr = false; 6218 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6219 Compr = true; 6220 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6221 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6222 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6223 } 6224 6225 for (auto i = 0; i < SrcIdx; ++i) { 6226 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6227 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6228 } 6229 } 6230 6231 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6232 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6233 6234 Inst.addOperand(MCOperand::createImm(EnMask)); 6235 } 6236 6237 //===----------------------------------------------------------------------===// 6238 // s_waitcnt 6239 //===----------------------------------------------------------------------===// 6240 6241 static bool 6242 encodeCnt( 6243 const AMDGPU::IsaVersion ISA, 6244 int64_t &IntVal, 6245 int64_t CntVal, 6246 bool Saturate, 6247 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6248 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6249 { 6250 bool Failed = false; 6251 6252 IntVal = encode(ISA, IntVal, CntVal); 6253 if (CntVal != decode(ISA, IntVal)) { 6254 if (Saturate) { 6255 IntVal = encode(ISA, IntVal, -1); 6256 } else { 6257 Failed = true; 6258 } 6259 } 6260 return Failed; 6261 } 6262 6263 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6264 6265 SMLoc CntLoc = getLoc(); 6266 StringRef CntName = getTokenStr(); 6267 6268 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6269 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6270 return false; 6271 6272 int64_t CntVal; 6273 SMLoc ValLoc = getLoc(); 6274 if (!parseExpr(CntVal)) 6275 return false; 6276 6277 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6278 6279 bool Failed = true; 6280 bool Sat = CntName.endswith("_sat"); 6281 6282 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6283 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6284 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6285 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6286 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6287 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6288 } else { 6289 Error(CntLoc, "invalid counter name " + CntName); 6290 return false; 6291 } 6292 6293 if (Failed) { 6294 Error(ValLoc, "too large value for " + CntName); 6295 return false; 6296 } 6297 6298 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6299 return false; 6300 6301 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6302 if (isToken(AsmToken::EndOfStatement)) { 6303 Error(getLoc(), "expected a counter name"); 6304 return false; 6305 } 6306 } 6307 6308 return true; 6309 } 6310 6311 OperandMatchResultTy 6312 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6313 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6314 int64_t Waitcnt = getWaitcntBitMask(ISA); 6315 SMLoc S = getLoc(); 6316 6317 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6318 while (!isToken(AsmToken::EndOfStatement)) { 6319 if (!parseCnt(Waitcnt)) 6320 return MatchOperand_ParseFail; 6321 } 6322 } else { 6323 if (!parseExpr(Waitcnt)) 6324 return MatchOperand_ParseFail; 6325 } 6326 6327 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6328 return MatchOperand_Success; 6329 } 6330 6331 bool 6332 AMDGPUOperand::isSWaitCnt() const { 6333 return isImm(); 6334 } 6335 6336 //===----------------------------------------------------------------------===// 6337 // hwreg 6338 //===----------------------------------------------------------------------===// 6339 6340 bool 6341 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6342 OperandInfoTy &Offset, 6343 OperandInfoTy &Width) { 6344 using namespace llvm::AMDGPU::Hwreg; 6345 6346 // The register may be specified by name or using a numeric code 6347 HwReg.Loc = getLoc(); 6348 if (isToken(AsmToken::Identifier) && 6349 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6350 HwReg.IsSymbolic = true; 6351 lex(); // skip register name 6352 } else if (!parseExpr(HwReg.Id, "a register name")) { 6353 return false; 6354 } 6355 6356 if (trySkipToken(AsmToken::RParen)) 6357 return true; 6358 6359 // parse optional params 6360 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6361 return false; 6362 6363 Offset.Loc = getLoc(); 6364 if (!parseExpr(Offset.Id)) 6365 return false; 6366 6367 if (!skipToken(AsmToken::Comma, "expected a comma")) 6368 return false; 6369 6370 Width.Loc = getLoc(); 6371 return parseExpr(Width.Id) && 6372 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6373 } 6374 6375 bool 6376 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6377 const OperandInfoTy &Offset, 6378 const OperandInfoTy &Width) { 6379 6380 using namespace llvm::AMDGPU::Hwreg; 6381 6382 if (HwReg.IsSymbolic) { 6383 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6384 Error(HwReg.Loc, 6385 "specified hardware register is not supported on this GPU"); 6386 return false; 6387 } 6388 } else { 6389 if (!isValidHwreg(HwReg.Id)) { 6390 Error(HwReg.Loc, 6391 "invalid code of hardware register: only 6-bit values are legal"); 6392 return false; 6393 } 6394 } 6395 if (!isValidHwregOffset(Offset.Id)) { 6396 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6397 return false; 6398 } 6399 if (!isValidHwregWidth(Width.Id)) { 6400 Error(Width.Loc, 6401 "invalid bitfield width: only values from 1 to 32 are legal"); 6402 return false; 6403 } 6404 return true; 6405 } 6406 6407 OperandMatchResultTy 6408 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6409 using namespace llvm::AMDGPU::Hwreg; 6410 6411 int64_t ImmVal = 0; 6412 SMLoc Loc = getLoc(); 6413 6414 if (trySkipId("hwreg", AsmToken::LParen)) { 6415 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6416 OperandInfoTy Offset(OFFSET_DEFAULT_); 6417 OperandInfoTy Width(WIDTH_DEFAULT_); 6418 if (parseHwregBody(HwReg, Offset, Width) && 6419 validateHwreg(HwReg, Offset, Width)) { 6420 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6421 } else { 6422 return MatchOperand_ParseFail; 6423 } 6424 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6425 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6426 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6427 return MatchOperand_ParseFail; 6428 } 6429 } else { 6430 return MatchOperand_ParseFail; 6431 } 6432 6433 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6434 return MatchOperand_Success; 6435 } 6436 6437 bool AMDGPUOperand::isHwreg() const { 6438 return isImmTy(ImmTyHwreg); 6439 } 6440 6441 //===----------------------------------------------------------------------===// 6442 // sendmsg 6443 //===----------------------------------------------------------------------===// 6444 6445 bool 6446 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6447 OperandInfoTy &Op, 6448 OperandInfoTy &Stream) { 6449 using namespace llvm::AMDGPU::SendMsg; 6450 6451 Msg.Loc = getLoc(); 6452 if (isToken(AsmToken::Identifier) && 6453 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6454 Msg.IsSymbolic = true; 6455 lex(); // skip message name 6456 } else if (!parseExpr(Msg.Id, "a message name")) { 6457 return false; 6458 } 6459 6460 if (trySkipToken(AsmToken::Comma)) { 6461 Op.IsDefined = true; 6462 Op.Loc = getLoc(); 6463 if (isToken(AsmToken::Identifier) && 6464 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6465 lex(); // skip operation name 6466 } else if (!parseExpr(Op.Id, "an operation name")) { 6467 return false; 6468 } 6469 6470 if (trySkipToken(AsmToken::Comma)) { 6471 Stream.IsDefined = true; 6472 Stream.Loc = getLoc(); 6473 if (!parseExpr(Stream.Id)) 6474 return false; 6475 } 6476 } 6477 6478 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6479 } 6480 6481 bool 6482 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6483 const OperandInfoTy &Op, 6484 const OperandInfoTy &Stream) { 6485 using namespace llvm::AMDGPU::SendMsg; 6486 6487 // Validation strictness depends on whether message is specified 6488 // in a symbolic or in a numeric form. In the latter case 6489 // only encoding possibility is checked. 6490 bool Strict = Msg.IsSymbolic; 6491 6492 if (Strict) { 6493 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6494 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6495 return false; 6496 } 6497 } else { 6498 if (!isValidMsgId(Msg.Id)) { 6499 Error(Msg.Loc, "invalid message id"); 6500 return false; 6501 } 6502 } 6503 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6504 if (Op.IsDefined) { 6505 Error(Op.Loc, "message does not support operations"); 6506 } else { 6507 Error(Msg.Loc, "missing message operation"); 6508 } 6509 return false; 6510 } 6511 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6512 Error(Op.Loc, "invalid operation id"); 6513 return false; 6514 } 6515 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6516 Error(Stream.Loc, "message operation does not support streams"); 6517 return false; 6518 } 6519 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6520 Error(Stream.Loc, "invalid message stream id"); 6521 return false; 6522 } 6523 return true; 6524 } 6525 6526 OperandMatchResultTy 6527 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6528 using namespace llvm::AMDGPU::SendMsg; 6529 6530 int64_t ImmVal = 0; 6531 SMLoc Loc = getLoc(); 6532 6533 if (trySkipId("sendmsg", AsmToken::LParen)) { 6534 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6535 OperandInfoTy Op(OP_NONE_); 6536 OperandInfoTy Stream(STREAM_ID_NONE_); 6537 if (parseSendMsgBody(Msg, Op, Stream) && 6538 validateSendMsg(Msg, Op, Stream)) { 6539 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6540 } else { 6541 return MatchOperand_ParseFail; 6542 } 6543 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6544 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6545 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6546 return MatchOperand_ParseFail; 6547 } 6548 } else { 6549 return MatchOperand_ParseFail; 6550 } 6551 6552 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6553 return MatchOperand_Success; 6554 } 6555 6556 bool AMDGPUOperand::isSendMsg() const { 6557 return isImmTy(ImmTySendMsg); 6558 } 6559 6560 //===----------------------------------------------------------------------===// 6561 // v_interp 6562 //===----------------------------------------------------------------------===// 6563 6564 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6565 StringRef Str; 6566 SMLoc S = getLoc(); 6567 6568 if (!parseId(Str)) 6569 return MatchOperand_NoMatch; 6570 6571 int Slot = StringSwitch<int>(Str) 6572 .Case("p10", 0) 6573 .Case("p20", 1) 6574 .Case("p0", 2) 6575 .Default(-1); 6576 6577 if (Slot == -1) { 6578 Error(S, "invalid interpolation slot"); 6579 return MatchOperand_ParseFail; 6580 } 6581 6582 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6583 AMDGPUOperand::ImmTyInterpSlot)); 6584 return MatchOperand_Success; 6585 } 6586 6587 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6588 StringRef Str; 6589 SMLoc S = getLoc(); 6590 6591 if (!parseId(Str)) 6592 return MatchOperand_NoMatch; 6593 6594 if (!Str.startswith("attr")) { 6595 Error(S, "invalid interpolation attribute"); 6596 return MatchOperand_ParseFail; 6597 } 6598 6599 StringRef Chan = Str.take_back(2); 6600 int AttrChan = StringSwitch<int>(Chan) 6601 .Case(".x", 0) 6602 .Case(".y", 1) 6603 .Case(".z", 2) 6604 .Case(".w", 3) 6605 .Default(-1); 6606 if (AttrChan == -1) { 6607 Error(S, "invalid or missing interpolation attribute channel"); 6608 return MatchOperand_ParseFail; 6609 } 6610 6611 Str = Str.drop_back(2).drop_front(4); 6612 6613 uint8_t Attr; 6614 if (Str.getAsInteger(10, Attr)) { 6615 Error(S, "invalid or missing interpolation attribute number"); 6616 return MatchOperand_ParseFail; 6617 } 6618 6619 if (Attr > 63) { 6620 Error(S, "out of bounds interpolation attribute number"); 6621 return MatchOperand_ParseFail; 6622 } 6623 6624 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6625 6626 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6627 AMDGPUOperand::ImmTyInterpAttr)); 6628 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6629 AMDGPUOperand::ImmTyAttrChan)); 6630 return MatchOperand_Success; 6631 } 6632 6633 //===----------------------------------------------------------------------===// 6634 // exp 6635 //===----------------------------------------------------------------------===// 6636 6637 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6638 using namespace llvm::AMDGPU::Exp; 6639 6640 StringRef Str; 6641 SMLoc S = getLoc(); 6642 6643 if (!parseId(Str)) 6644 return MatchOperand_NoMatch; 6645 6646 unsigned Id = getTgtId(Str); 6647 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6648 Error(S, (Id == ET_INVALID) ? 6649 "invalid exp target" : 6650 "exp target is not supported on this GPU"); 6651 return MatchOperand_ParseFail; 6652 } 6653 6654 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6655 AMDGPUOperand::ImmTyExpTgt)); 6656 return MatchOperand_Success; 6657 } 6658 6659 //===----------------------------------------------------------------------===// 6660 // parser helpers 6661 //===----------------------------------------------------------------------===// 6662 6663 bool 6664 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6665 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6666 } 6667 6668 bool 6669 AMDGPUAsmParser::isId(const StringRef Id) const { 6670 return isId(getToken(), Id); 6671 } 6672 6673 bool 6674 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6675 return getTokenKind() == Kind; 6676 } 6677 6678 bool 6679 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6680 if (isId(Id)) { 6681 lex(); 6682 return true; 6683 } 6684 return false; 6685 } 6686 6687 bool 6688 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6689 if (isToken(AsmToken::Identifier)) { 6690 StringRef Tok = getTokenStr(); 6691 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6692 lex(); 6693 return true; 6694 } 6695 } 6696 return false; 6697 } 6698 6699 bool 6700 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6701 if (isId(Id) && peekToken().is(Kind)) { 6702 lex(); 6703 lex(); 6704 return true; 6705 } 6706 return false; 6707 } 6708 6709 bool 6710 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6711 if (isToken(Kind)) { 6712 lex(); 6713 return true; 6714 } 6715 return false; 6716 } 6717 6718 bool 6719 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6720 const StringRef ErrMsg) { 6721 if (!trySkipToken(Kind)) { 6722 Error(getLoc(), ErrMsg); 6723 return false; 6724 } 6725 return true; 6726 } 6727 6728 bool 6729 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6730 SMLoc S = getLoc(); 6731 6732 const MCExpr *Expr; 6733 if (Parser.parseExpression(Expr)) 6734 return false; 6735 6736 if (Expr->evaluateAsAbsolute(Imm)) 6737 return true; 6738 6739 if (Expected.empty()) { 6740 Error(S, "expected absolute expression"); 6741 } else { 6742 Error(S, Twine("expected ", Expected) + 6743 Twine(" or an absolute expression")); 6744 } 6745 return false; 6746 } 6747 6748 bool 6749 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6750 SMLoc S = getLoc(); 6751 6752 const MCExpr *Expr; 6753 if (Parser.parseExpression(Expr)) 6754 return false; 6755 6756 int64_t IntVal; 6757 if (Expr->evaluateAsAbsolute(IntVal)) { 6758 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6759 } else { 6760 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6761 } 6762 return true; 6763 } 6764 6765 bool 6766 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6767 if (isToken(AsmToken::String)) { 6768 Val = getToken().getStringContents(); 6769 lex(); 6770 return true; 6771 } else { 6772 Error(getLoc(), ErrMsg); 6773 return false; 6774 } 6775 } 6776 6777 bool 6778 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6779 if (isToken(AsmToken::Identifier)) { 6780 Val = getTokenStr(); 6781 lex(); 6782 return true; 6783 } else { 6784 if (!ErrMsg.empty()) 6785 Error(getLoc(), ErrMsg); 6786 return false; 6787 } 6788 } 6789 6790 AsmToken 6791 AMDGPUAsmParser::getToken() const { 6792 return Parser.getTok(); 6793 } 6794 6795 AsmToken 6796 AMDGPUAsmParser::peekToken() { 6797 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6798 } 6799 6800 void 6801 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6802 auto TokCount = getLexer().peekTokens(Tokens); 6803 6804 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6805 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6806 } 6807 6808 AsmToken::TokenKind 6809 AMDGPUAsmParser::getTokenKind() const { 6810 return getLexer().getKind(); 6811 } 6812 6813 SMLoc 6814 AMDGPUAsmParser::getLoc() const { 6815 return getToken().getLoc(); 6816 } 6817 6818 StringRef 6819 AMDGPUAsmParser::getTokenStr() const { 6820 return getToken().getString(); 6821 } 6822 6823 void 6824 AMDGPUAsmParser::lex() { 6825 Parser.Lex(); 6826 } 6827 6828 SMLoc 6829 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6830 const OperandVector &Operands) const { 6831 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6832 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6833 if (Test(Op)) 6834 return Op.getStartLoc(); 6835 } 6836 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6837 } 6838 6839 SMLoc 6840 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6841 const OperandVector &Operands) const { 6842 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6843 return getOperandLoc(Test, Operands); 6844 } 6845 6846 SMLoc 6847 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6848 const OperandVector &Operands) const { 6849 auto Test = [=](const AMDGPUOperand& Op) { 6850 return Op.isRegKind() && Op.getReg() == Reg; 6851 }; 6852 return getOperandLoc(Test, Operands); 6853 } 6854 6855 SMLoc 6856 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6857 auto Test = [](const AMDGPUOperand& Op) { 6858 return Op.IsImmKindLiteral() || Op.isExpr(); 6859 }; 6860 return getOperandLoc(Test, Operands); 6861 } 6862 6863 SMLoc 6864 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6865 auto Test = [](const AMDGPUOperand& Op) { 6866 return Op.isImmKindConst(); 6867 }; 6868 return getOperandLoc(Test, Operands); 6869 } 6870 6871 //===----------------------------------------------------------------------===// 6872 // swizzle 6873 //===----------------------------------------------------------------------===// 6874 6875 LLVM_READNONE 6876 static unsigned 6877 encodeBitmaskPerm(const unsigned AndMask, 6878 const unsigned OrMask, 6879 const unsigned XorMask) { 6880 using namespace llvm::AMDGPU::Swizzle; 6881 6882 return BITMASK_PERM_ENC | 6883 (AndMask << BITMASK_AND_SHIFT) | 6884 (OrMask << BITMASK_OR_SHIFT) | 6885 (XorMask << BITMASK_XOR_SHIFT); 6886 } 6887 6888 bool 6889 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6890 const unsigned MinVal, 6891 const unsigned MaxVal, 6892 const StringRef ErrMsg, 6893 SMLoc &Loc) { 6894 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6895 return false; 6896 } 6897 Loc = getLoc(); 6898 if (!parseExpr(Op)) { 6899 return false; 6900 } 6901 if (Op < MinVal || Op > MaxVal) { 6902 Error(Loc, ErrMsg); 6903 return false; 6904 } 6905 6906 return true; 6907 } 6908 6909 bool 6910 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6911 const unsigned MinVal, 6912 const unsigned MaxVal, 6913 const StringRef ErrMsg) { 6914 SMLoc Loc; 6915 for (unsigned i = 0; i < OpNum; ++i) { 6916 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6917 return false; 6918 } 6919 6920 return true; 6921 } 6922 6923 bool 6924 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6925 using namespace llvm::AMDGPU::Swizzle; 6926 6927 int64_t Lane[LANE_NUM]; 6928 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6929 "expected a 2-bit lane id")) { 6930 Imm = QUAD_PERM_ENC; 6931 for (unsigned I = 0; I < LANE_NUM; ++I) { 6932 Imm |= Lane[I] << (LANE_SHIFT * I); 6933 } 6934 return true; 6935 } 6936 return false; 6937 } 6938 6939 bool 6940 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6941 using namespace llvm::AMDGPU::Swizzle; 6942 6943 SMLoc Loc; 6944 int64_t GroupSize; 6945 int64_t LaneIdx; 6946 6947 if (!parseSwizzleOperand(GroupSize, 6948 2, 32, 6949 "group size must be in the interval [2,32]", 6950 Loc)) { 6951 return false; 6952 } 6953 if (!isPowerOf2_64(GroupSize)) { 6954 Error(Loc, "group size must be a power of two"); 6955 return false; 6956 } 6957 if (parseSwizzleOperand(LaneIdx, 6958 0, GroupSize - 1, 6959 "lane id must be in the interval [0,group size - 1]", 6960 Loc)) { 6961 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6962 return true; 6963 } 6964 return false; 6965 } 6966 6967 bool 6968 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6969 using namespace llvm::AMDGPU::Swizzle; 6970 6971 SMLoc Loc; 6972 int64_t GroupSize; 6973 6974 if (!parseSwizzleOperand(GroupSize, 6975 2, 32, 6976 "group size must be in the interval [2,32]", 6977 Loc)) { 6978 return false; 6979 } 6980 if (!isPowerOf2_64(GroupSize)) { 6981 Error(Loc, "group size must be a power of two"); 6982 return false; 6983 } 6984 6985 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6986 return true; 6987 } 6988 6989 bool 6990 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6991 using namespace llvm::AMDGPU::Swizzle; 6992 6993 SMLoc Loc; 6994 int64_t GroupSize; 6995 6996 if (!parseSwizzleOperand(GroupSize, 6997 1, 16, 6998 "group size must be in the interval [1,16]", 6999 Loc)) { 7000 return false; 7001 } 7002 if (!isPowerOf2_64(GroupSize)) { 7003 Error(Loc, "group size must be a power of two"); 7004 return false; 7005 } 7006 7007 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7008 return true; 7009 } 7010 7011 bool 7012 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7013 using namespace llvm::AMDGPU::Swizzle; 7014 7015 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7016 return false; 7017 } 7018 7019 StringRef Ctl; 7020 SMLoc StrLoc = getLoc(); 7021 if (!parseString(Ctl)) { 7022 return false; 7023 } 7024 if (Ctl.size() != BITMASK_WIDTH) { 7025 Error(StrLoc, "expected a 5-character mask"); 7026 return false; 7027 } 7028 7029 unsigned AndMask = 0; 7030 unsigned OrMask = 0; 7031 unsigned XorMask = 0; 7032 7033 for (size_t i = 0; i < Ctl.size(); ++i) { 7034 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7035 switch(Ctl[i]) { 7036 default: 7037 Error(StrLoc, "invalid mask"); 7038 return false; 7039 case '0': 7040 break; 7041 case '1': 7042 OrMask |= Mask; 7043 break; 7044 case 'p': 7045 AndMask |= Mask; 7046 break; 7047 case 'i': 7048 AndMask |= Mask; 7049 XorMask |= Mask; 7050 break; 7051 } 7052 } 7053 7054 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7055 return true; 7056 } 7057 7058 bool 7059 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7060 7061 SMLoc OffsetLoc = getLoc(); 7062 7063 if (!parseExpr(Imm, "a swizzle macro")) { 7064 return false; 7065 } 7066 if (!isUInt<16>(Imm)) { 7067 Error(OffsetLoc, "expected a 16-bit offset"); 7068 return false; 7069 } 7070 return true; 7071 } 7072 7073 bool 7074 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7075 using namespace llvm::AMDGPU::Swizzle; 7076 7077 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7078 7079 SMLoc ModeLoc = getLoc(); 7080 bool Ok = false; 7081 7082 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7083 Ok = parseSwizzleQuadPerm(Imm); 7084 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7085 Ok = parseSwizzleBitmaskPerm(Imm); 7086 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7087 Ok = parseSwizzleBroadcast(Imm); 7088 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7089 Ok = parseSwizzleSwap(Imm); 7090 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7091 Ok = parseSwizzleReverse(Imm); 7092 } else { 7093 Error(ModeLoc, "expected a swizzle mode"); 7094 } 7095 7096 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7097 } 7098 7099 return false; 7100 } 7101 7102 OperandMatchResultTy 7103 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7104 SMLoc S = getLoc(); 7105 int64_t Imm = 0; 7106 7107 if (trySkipId("offset")) { 7108 7109 bool Ok = false; 7110 if (skipToken(AsmToken::Colon, "expected a colon")) { 7111 if (trySkipId("swizzle")) { 7112 Ok = parseSwizzleMacro(Imm); 7113 } else { 7114 Ok = parseSwizzleOffset(Imm); 7115 } 7116 } 7117 7118 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7119 7120 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7121 } else { 7122 // Swizzle "offset" operand is optional. 7123 // If it is omitted, try parsing other optional operands. 7124 return parseOptionalOpr(Operands); 7125 } 7126 } 7127 7128 bool 7129 AMDGPUOperand::isSwizzle() const { 7130 return isImmTy(ImmTySwizzle); 7131 } 7132 7133 //===----------------------------------------------------------------------===// 7134 // VGPR Index Mode 7135 //===----------------------------------------------------------------------===// 7136 7137 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7138 7139 using namespace llvm::AMDGPU::VGPRIndexMode; 7140 7141 if (trySkipToken(AsmToken::RParen)) { 7142 return OFF; 7143 } 7144 7145 int64_t Imm = 0; 7146 7147 while (true) { 7148 unsigned Mode = 0; 7149 SMLoc S = getLoc(); 7150 7151 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7152 if (trySkipId(IdSymbolic[ModeId])) { 7153 Mode = 1 << ModeId; 7154 break; 7155 } 7156 } 7157 7158 if (Mode == 0) { 7159 Error(S, (Imm == 0)? 7160 "expected a VGPR index mode or a closing parenthesis" : 7161 "expected a VGPR index mode"); 7162 return UNDEF; 7163 } 7164 7165 if (Imm & Mode) { 7166 Error(S, "duplicate VGPR index mode"); 7167 return UNDEF; 7168 } 7169 Imm |= Mode; 7170 7171 if (trySkipToken(AsmToken::RParen)) 7172 break; 7173 if (!skipToken(AsmToken::Comma, 7174 "expected a comma or a closing parenthesis")) 7175 return UNDEF; 7176 } 7177 7178 return Imm; 7179 } 7180 7181 OperandMatchResultTy 7182 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7183 7184 using namespace llvm::AMDGPU::VGPRIndexMode; 7185 7186 int64_t Imm = 0; 7187 SMLoc S = getLoc(); 7188 7189 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7190 Imm = parseGPRIdxMacro(); 7191 if (Imm == UNDEF) 7192 return MatchOperand_ParseFail; 7193 } else { 7194 if (getParser().parseAbsoluteExpression(Imm)) 7195 return MatchOperand_ParseFail; 7196 if (Imm < 0 || !isUInt<4>(Imm)) { 7197 Error(S, "invalid immediate: only 4-bit values are legal"); 7198 return MatchOperand_ParseFail; 7199 } 7200 } 7201 7202 Operands.push_back( 7203 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7204 return MatchOperand_Success; 7205 } 7206 7207 bool AMDGPUOperand::isGPRIdxMode() const { 7208 return isImmTy(ImmTyGprIdxMode); 7209 } 7210 7211 //===----------------------------------------------------------------------===// 7212 // sopp branch targets 7213 //===----------------------------------------------------------------------===// 7214 7215 OperandMatchResultTy 7216 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7217 7218 // Make sure we are not parsing something 7219 // that looks like a label or an expression but is not. 7220 // This will improve error messages. 7221 if (isRegister() || isModifier()) 7222 return MatchOperand_NoMatch; 7223 7224 if (!parseExpr(Operands)) 7225 return MatchOperand_ParseFail; 7226 7227 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7228 assert(Opr.isImm() || Opr.isExpr()); 7229 SMLoc Loc = Opr.getStartLoc(); 7230 7231 // Currently we do not support arbitrary expressions as branch targets. 7232 // Only labels and absolute expressions are accepted. 7233 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7234 Error(Loc, "expected an absolute expression or a label"); 7235 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7236 Error(Loc, "expected a 16-bit signed jump offset"); 7237 } 7238 7239 return MatchOperand_Success; 7240 } 7241 7242 //===----------------------------------------------------------------------===// 7243 // Boolean holding registers 7244 //===----------------------------------------------------------------------===// 7245 7246 OperandMatchResultTy 7247 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7248 return parseReg(Operands); 7249 } 7250 7251 //===----------------------------------------------------------------------===// 7252 // mubuf 7253 //===----------------------------------------------------------------------===// 7254 7255 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7256 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7257 } 7258 7259 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7260 const OperandVector &Operands, 7261 bool IsAtomic, 7262 bool IsLds) { 7263 bool IsLdsOpcode = IsLds; 7264 bool HasLdsModifier = false; 7265 OptionalImmIndexMap OptionalIdx; 7266 unsigned FirstOperandIdx = 1; 7267 bool IsAtomicReturn = false; 7268 7269 if (IsAtomic) { 7270 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7271 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7272 if (!Op.isCPol()) 7273 continue; 7274 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7275 break; 7276 } 7277 7278 if (!IsAtomicReturn) { 7279 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7280 if (NewOpc != -1) 7281 Inst.setOpcode(NewOpc); 7282 } 7283 7284 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7285 SIInstrFlags::IsAtomicRet; 7286 } 7287 7288 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7289 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7290 7291 // Add the register arguments 7292 if (Op.isReg()) { 7293 Op.addRegOperands(Inst, 1); 7294 // Insert a tied src for atomic return dst. 7295 // This cannot be postponed as subsequent calls to 7296 // addImmOperands rely on correct number of MC operands. 7297 if (IsAtomicReturn && i == FirstOperandIdx) 7298 Op.addRegOperands(Inst, 1); 7299 continue; 7300 } 7301 7302 // Handle the case where soffset is an immediate 7303 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7304 Op.addImmOperands(Inst, 1); 7305 continue; 7306 } 7307 7308 HasLdsModifier |= Op.isLDS(); 7309 7310 // Handle tokens like 'offen' which are sometimes hard-coded into the 7311 // asm string. There are no MCInst operands for these. 7312 if (Op.isToken()) { 7313 continue; 7314 } 7315 assert(Op.isImm()); 7316 7317 // Handle optional arguments 7318 OptionalIdx[Op.getImmTy()] = i; 7319 } 7320 7321 // This is a workaround for an llvm quirk which may result in an 7322 // incorrect instruction selection. Lds and non-lds versions of 7323 // MUBUF instructions are identical except that lds versions 7324 // have mandatory 'lds' modifier. However this modifier follows 7325 // optional modifiers and llvm asm matcher regards this 'lds' 7326 // modifier as an optional one. As a result, an lds version 7327 // of opcode may be selected even if it has no 'lds' modifier. 7328 if (IsLdsOpcode && !HasLdsModifier) { 7329 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7330 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7331 Inst.setOpcode(NoLdsOpcode); 7332 IsLdsOpcode = false; 7333 } 7334 } 7335 7336 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7337 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7338 7339 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7340 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7341 } 7342 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7343 } 7344 7345 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7346 OptionalImmIndexMap OptionalIdx; 7347 7348 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7349 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7350 7351 // Add the register arguments 7352 if (Op.isReg()) { 7353 Op.addRegOperands(Inst, 1); 7354 continue; 7355 } 7356 7357 // Handle the case where soffset is an immediate 7358 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7359 Op.addImmOperands(Inst, 1); 7360 continue; 7361 } 7362 7363 // Handle tokens like 'offen' which are sometimes hard-coded into the 7364 // asm string. There are no MCInst operands for these. 7365 if (Op.isToken()) { 7366 continue; 7367 } 7368 assert(Op.isImm()); 7369 7370 // Handle optional arguments 7371 OptionalIdx[Op.getImmTy()] = i; 7372 } 7373 7374 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7375 AMDGPUOperand::ImmTyOffset); 7376 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7377 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7378 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7379 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7380 } 7381 7382 //===----------------------------------------------------------------------===// 7383 // mimg 7384 //===----------------------------------------------------------------------===// 7385 7386 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7387 bool IsAtomic) { 7388 unsigned I = 1; 7389 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7390 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7391 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7392 } 7393 7394 if (IsAtomic) { 7395 // Add src, same as dst 7396 assert(Desc.getNumDefs() == 1); 7397 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7398 } 7399 7400 OptionalImmIndexMap OptionalIdx; 7401 7402 for (unsigned E = Operands.size(); I != E; ++I) { 7403 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7404 7405 // Add the register arguments 7406 if (Op.isReg()) { 7407 Op.addRegOperands(Inst, 1); 7408 } else if (Op.isImmModifier()) { 7409 OptionalIdx[Op.getImmTy()] = I; 7410 } else if (!Op.isToken()) { 7411 llvm_unreachable("unexpected operand type"); 7412 } 7413 } 7414 7415 bool IsGFX10Plus = isGFX10Plus(); 7416 7417 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7418 if (IsGFX10Plus) 7419 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7420 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7421 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7422 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7423 if (IsGFX10Plus) 7424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7425 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7426 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7428 if (!IsGFX10Plus) 7429 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7430 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7431 } 7432 7433 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7434 cvtMIMG(Inst, Operands, true); 7435 } 7436 7437 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7438 OptionalImmIndexMap OptionalIdx; 7439 bool IsAtomicReturn = false; 7440 7441 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7442 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7443 if (!Op.isCPol()) 7444 continue; 7445 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7446 break; 7447 } 7448 7449 if (!IsAtomicReturn) { 7450 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7451 if (NewOpc != -1) 7452 Inst.setOpcode(NewOpc); 7453 } 7454 7455 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7456 SIInstrFlags::IsAtomicRet; 7457 7458 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7459 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7460 7461 // Add the register arguments 7462 if (Op.isReg()) { 7463 Op.addRegOperands(Inst, 1); 7464 if (IsAtomicReturn && i == 1) 7465 Op.addRegOperands(Inst, 1); 7466 continue; 7467 } 7468 7469 // Handle the case where soffset is an immediate 7470 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7471 Op.addImmOperands(Inst, 1); 7472 continue; 7473 } 7474 7475 // Handle tokens like 'offen' which are sometimes hard-coded into the 7476 // asm string. There are no MCInst operands for these. 7477 if (Op.isToken()) { 7478 continue; 7479 } 7480 assert(Op.isImm()); 7481 7482 // Handle optional arguments 7483 OptionalIdx[Op.getImmTy()] = i; 7484 } 7485 7486 if ((int)Inst.getNumOperands() <= 7487 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7488 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7489 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7490 } 7491 7492 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7493 const OperandVector &Operands) { 7494 for (unsigned I = 1; I < Operands.size(); ++I) { 7495 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7496 if (Operand.isReg()) 7497 Operand.addRegOperands(Inst, 1); 7498 } 7499 7500 Inst.addOperand(MCOperand::createImm(1)); // a16 7501 } 7502 7503 //===----------------------------------------------------------------------===// 7504 // smrd 7505 //===----------------------------------------------------------------------===// 7506 7507 bool AMDGPUOperand::isSMRDOffset8() const { 7508 return isImm() && isUInt<8>(getImm()); 7509 } 7510 7511 bool AMDGPUOperand::isSMEMOffset() const { 7512 return isImm(); // Offset range is checked later by validator. 7513 } 7514 7515 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7516 // 32-bit literals are only supported on CI and we only want to use them 7517 // when the offset is > 8-bits. 7518 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7519 } 7520 7521 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7522 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7523 } 7524 7525 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7526 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7527 } 7528 7529 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7530 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7531 } 7532 7533 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7534 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7535 } 7536 7537 //===----------------------------------------------------------------------===// 7538 // vop3 7539 //===----------------------------------------------------------------------===// 7540 7541 static bool ConvertOmodMul(int64_t &Mul) { 7542 if (Mul != 1 && Mul != 2 && Mul != 4) 7543 return false; 7544 7545 Mul >>= 1; 7546 return true; 7547 } 7548 7549 static bool ConvertOmodDiv(int64_t &Div) { 7550 if (Div == 1) { 7551 Div = 0; 7552 return true; 7553 } 7554 7555 if (Div == 2) { 7556 Div = 3; 7557 return true; 7558 } 7559 7560 return false; 7561 } 7562 7563 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7564 // This is intentional and ensures compatibility with sp3. 7565 // See bug 35397 for details. 7566 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7567 if (BoundCtrl == 0 || BoundCtrl == 1) { 7568 BoundCtrl = 1; 7569 return true; 7570 } 7571 return false; 7572 } 7573 7574 // Note: the order in this table matches the order of operands in AsmString. 7575 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7576 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7577 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7578 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7579 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7580 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7581 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7582 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7583 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7584 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7585 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7586 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7587 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7588 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7589 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7590 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7591 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7592 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7593 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7594 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7595 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7596 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7597 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7598 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7599 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7600 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7601 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7602 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7603 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7604 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7605 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7606 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7607 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7608 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7609 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7610 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7611 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7612 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7613 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7614 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7615 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7616 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7617 }; 7618 7619 void AMDGPUAsmParser::onBeginOfFile() { 7620 if (!getParser().getStreamer().getTargetStreamer() || 7621 getSTI().getTargetTriple().getArch() == Triple::r600) 7622 return; 7623 7624 if (!getTargetStreamer().getTargetID()) 7625 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7626 7627 if (isHsaAbiVersion3AndAbove(&getSTI())) 7628 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7629 } 7630 7631 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7632 7633 OperandMatchResultTy res = parseOptionalOpr(Operands); 7634 7635 // This is a hack to enable hardcoded mandatory operands which follow 7636 // optional operands. 7637 // 7638 // Current design assumes that all operands after the first optional operand 7639 // are also optional. However implementation of some instructions violates 7640 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7641 // 7642 // To alleviate this problem, we have to (implicitly) parse extra operands 7643 // to make sure autogenerated parser of custom operands never hit hardcoded 7644 // mandatory operands. 7645 7646 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7647 if (res != MatchOperand_Success || 7648 isToken(AsmToken::EndOfStatement)) 7649 break; 7650 7651 trySkipToken(AsmToken::Comma); 7652 res = parseOptionalOpr(Operands); 7653 } 7654 7655 return res; 7656 } 7657 7658 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7659 OperandMatchResultTy res; 7660 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7661 // try to parse any optional operand here 7662 if (Op.IsBit) { 7663 res = parseNamedBit(Op.Name, Operands, Op.Type); 7664 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7665 res = parseOModOperand(Operands); 7666 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7667 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7668 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7669 res = parseSDWASel(Operands, Op.Name, Op.Type); 7670 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7671 res = parseSDWADstUnused(Operands); 7672 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7673 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7674 Op.Type == AMDGPUOperand::ImmTyNegLo || 7675 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7676 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7677 Op.ConvertResult); 7678 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7679 res = parseDim(Operands); 7680 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7681 res = parseCPol(Operands); 7682 } else { 7683 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7684 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7685 res = parseOperandArrayWithPrefix("neg", Operands, 7686 AMDGPUOperand::ImmTyBLGP, 7687 nullptr); 7688 } 7689 } 7690 if (res != MatchOperand_NoMatch) { 7691 return res; 7692 } 7693 } 7694 return MatchOperand_NoMatch; 7695 } 7696 7697 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7698 StringRef Name = getTokenStr(); 7699 if (Name == "mul") { 7700 return parseIntWithPrefix("mul", Operands, 7701 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7702 } 7703 7704 if (Name == "div") { 7705 return parseIntWithPrefix("div", Operands, 7706 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7707 } 7708 7709 return MatchOperand_NoMatch; 7710 } 7711 7712 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7713 cvtVOP3P(Inst, Operands); 7714 7715 int Opc = Inst.getOpcode(); 7716 7717 int SrcNum; 7718 const int Ops[] = { AMDGPU::OpName::src0, 7719 AMDGPU::OpName::src1, 7720 AMDGPU::OpName::src2 }; 7721 for (SrcNum = 0; 7722 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7723 ++SrcNum); 7724 assert(SrcNum > 0); 7725 7726 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7727 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7728 7729 if ((OpSel & (1 << SrcNum)) != 0) { 7730 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7731 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7732 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7733 } 7734 } 7735 7736 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7737 // 1. This operand is input modifiers 7738 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7739 // 2. This is not last operand 7740 && Desc.NumOperands > (OpNum + 1) 7741 // 3. Next operand is register class 7742 && Desc.OpInfo[OpNum + 1].RegClass != -1 7743 // 4. Next register is not tied to any other operand 7744 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7745 } 7746 7747 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7748 { 7749 OptionalImmIndexMap OptionalIdx; 7750 unsigned Opc = Inst.getOpcode(); 7751 7752 unsigned I = 1; 7753 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7754 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7755 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7756 } 7757 7758 for (unsigned E = Operands.size(); I != E; ++I) { 7759 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7760 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7761 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7762 } else if (Op.isInterpSlot() || 7763 Op.isInterpAttr() || 7764 Op.isAttrChan()) { 7765 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7766 } else if (Op.isImmModifier()) { 7767 OptionalIdx[Op.getImmTy()] = I; 7768 } else { 7769 llvm_unreachable("unhandled operand type"); 7770 } 7771 } 7772 7773 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7774 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7775 } 7776 7777 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7778 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7779 } 7780 7781 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7783 } 7784 } 7785 7786 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7787 OptionalImmIndexMap &OptionalIdx) { 7788 unsigned Opc = Inst.getOpcode(); 7789 7790 unsigned I = 1; 7791 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7792 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7793 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7794 } 7795 7796 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7797 // This instruction has src modifiers 7798 for (unsigned E = Operands.size(); I != E; ++I) { 7799 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7800 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7801 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7802 } else if (Op.isImmModifier()) { 7803 OptionalIdx[Op.getImmTy()] = I; 7804 } else if (Op.isRegOrImm()) { 7805 Op.addRegOrImmOperands(Inst, 1); 7806 } else { 7807 llvm_unreachable("unhandled operand type"); 7808 } 7809 } 7810 } else { 7811 // No src modifiers 7812 for (unsigned E = Operands.size(); I != E; ++I) { 7813 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7814 if (Op.isMod()) { 7815 OptionalIdx[Op.getImmTy()] = I; 7816 } else { 7817 Op.addRegOrImmOperands(Inst, 1); 7818 } 7819 } 7820 } 7821 7822 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7824 } 7825 7826 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7827 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7828 } 7829 7830 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7831 // it has src2 register operand that is tied to dst operand 7832 // we don't allow modifiers for this operand in assembler so src2_modifiers 7833 // should be 0. 7834 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7835 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7836 Opc == AMDGPU::V_MAC_F32_e64_vi || 7837 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7838 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7839 Opc == AMDGPU::V_MAC_F16_e64_vi || 7840 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7841 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7842 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7843 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7844 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7845 auto it = Inst.begin(); 7846 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7847 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7848 ++it; 7849 // Copy the operand to ensure it's not invalidated when Inst grows. 7850 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7851 } 7852 } 7853 7854 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7855 OptionalImmIndexMap OptionalIdx; 7856 cvtVOP3(Inst, Operands, OptionalIdx); 7857 } 7858 7859 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7860 OptionalImmIndexMap &OptIdx) { 7861 const int Opc = Inst.getOpcode(); 7862 const MCInstrDesc &Desc = MII.get(Opc); 7863 7864 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7865 7866 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7867 assert(!IsPacked); 7868 Inst.addOperand(Inst.getOperand(0)); 7869 } 7870 7871 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7872 // instruction, and then figure out where to actually put the modifiers 7873 7874 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7875 if (OpSelIdx != -1) { 7876 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7877 } 7878 7879 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7880 if (OpSelHiIdx != -1) { 7881 int DefaultVal = IsPacked ? -1 : 0; 7882 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7883 DefaultVal); 7884 } 7885 7886 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7887 if (NegLoIdx != -1) { 7888 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7889 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7890 } 7891 7892 const int Ops[] = { AMDGPU::OpName::src0, 7893 AMDGPU::OpName::src1, 7894 AMDGPU::OpName::src2 }; 7895 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7896 AMDGPU::OpName::src1_modifiers, 7897 AMDGPU::OpName::src2_modifiers }; 7898 7899 unsigned OpSel = 0; 7900 unsigned OpSelHi = 0; 7901 unsigned NegLo = 0; 7902 unsigned NegHi = 0; 7903 7904 if (OpSelIdx != -1) 7905 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7906 7907 if (OpSelHiIdx != -1) 7908 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7909 7910 if (NegLoIdx != -1) { 7911 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7912 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7913 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7914 } 7915 7916 for (int J = 0; J < 3; ++J) { 7917 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7918 if (OpIdx == -1) 7919 break; 7920 7921 uint32_t ModVal = 0; 7922 7923 if ((OpSel & (1 << J)) != 0) 7924 ModVal |= SISrcMods::OP_SEL_0; 7925 7926 if ((OpSelHi & (1 << J)) != 0) 7927 ModVal |= SISrcMods::OP_SEL_1; 7928 7929 if ((NegLo & (1 << J)) != 0) 7930 ModVal |= SISrcMods::NEG; 7931 7932 if ((NegHi & (1 << J)) != 0) 7933 ModVal |= SISrcMods::NEG_HI; 7934 7935 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7936 7937 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7938 } 7939 } 7940 7941 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7942 OptionalImmIndexMap OptIdx; 7943 cvtVOP3(Inst, Operands, OptIdx); 7944 cvtVOP3P(Inst, Operands, OptIdx); 7945 } 7946 7947 //===----------------------------------------------------------------------===// 7948 // dpp 7949 //===----------------------------------------------------------------------===// 7950 7951 bool AMDGPUOperand::isDPP8() const { 7952 return isImmTy(ImmTyDPP8); 7953 } 7954 7955 bool AMDGPUOperand::isDPPCtrl() const { 7956 using namespace AMDGPU::DPP; 7957 7958 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7959 if (result) { 7960 int64_t Imm = getImm(); 7961 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7962 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7963 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7964 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7965 (Imm == DppCtrl::WAVE_SHL1) || 7966 (Imm == DppCtrl::WAVE_ROL1) || 7967 (Imm == DppCtrl::WAVE_SHR1) || 7968 (Imm == DppCtrl::WAVE_ROR1) || 7969 (Imm == DppCtrl::ROW_MIRROR) || 7970 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7971 (Imm == DppCtrl::BCAST15) || 7972 (Imm == DppCtrl::BCAST31) || 7973 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7974 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7975 } 7976 return false; 7977 } 7978 7979 //===----------------------------------------------------------------------===// 7980 // mAI 7981 //===----------------------------------------------------------------------===// 7982 7983 bool AMDGPUOperand::isBLGP() const { 7984 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7985 } 7986 7987 bool AMDGPUOperand::isCBSZ() const { 7988 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7989 } 7990 7991 bool AMDGPUOperand::isABID() const { 7992 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7993 } 7994 7995 bool AMDGPUOperand::isS16Imm() const { 7996 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7997 } 7998 7999 bool AMDGPUOperand::isU16Imm() const { 8000 return isImm() && isUInt<16>(getImm()); 8001 } 8002 8003 //===----------------------------------------------------------------------===// 8004 // dim 8005 //===----------------------------------------------------------------------===// 8006 8007 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8008 // We want to allow "dim:1D" etc., 8009 // but the initial 1 is tokenized as an integer. 8010 std::string Token; 8011 if (isToken(AsmToken::Integer)) { 8012 SMLoc Loc = getToken().getEndLoc(); 8013 Token = std::string(getTokenStr()); 8014 lex(); 8015 if (getLoc() != Loc) 8016 return false; 8017 } 8018 8019 StringRef Suffix; 8020 if (!parseId(Suffix)) 8021 return false; 8022 Token += Suffix; 8023 8024 StringRef DimId = Token; 8025 if (DimId.startswith("SQ_RSRC_IMG_")) 8026 DimId = DimId.drop_front(12); 8027 8028 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8029 if (!DimInfo) 8030 return false; 8031 8032 Encoding = DimInfo->Encoding; 8033 return true; 8034 } 8035 8036 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8037 if (!isGFX10Plus()) 8038 return MatchOperand_NoMatch; 8039 8040 SMLoc S = getLoc(); 8041 8042 if (!trySkipId("dim", AsmToken::Colon)) 8043 return MatchOperand_NoMatch; 8044 8045 unsigned Encoding; 8046 SMLoc Loc = getLoc(); 8047 if (!parseDimId(Encoding)) { 8048 Error(Loc, "invalid dim value"); 8049 return MatchOperand_ParseFail; 8050 } 8051 8052 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8053 AMDGPUOperand::ImmTyDim)); 8054 return MatchOperand_Success; 8055 } 8056 8057 //===----------------------------------------------------------------------===// 8058 // dpp 8059 //===----------------------------------------------------------------------===// 8060 8061 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8062 SMLoc S = getLoc(); 8063 8064 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8065 return MatchOperand_NoMatch; 8066 8067 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8068 8069 int64_t Sels[8]; 8070 8071 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8072 return MatchOperand_ParseFail; 8073 8074 for (size_t i = 0; i < 8; ++i) { 8075 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8076 return MatchOperand_ParseFail; 8077 8078 SMLoc Loc = getLoc(); 8079 if (getParser().parseAbsoluteExpression(Sels[i])) 8080 return MatchOperand_ParseFail; 8081 if (0 > Sels[i] || 7 < Sels[i]) { 8082 Error(Loc, "expected a 3-bit value"); 8083 return MatchOperand_ParseFail; 8084 } 8085 } 8086 8087 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8088 return MatchOperand_ParseFail; 8089 8090 unsigned DPP8 = 0; 8091 for (size_t i = 0; i < 8; ++i) 8092 DPP8 |= (Sels[i] << (i * 3)); 8093 8094 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8095 return MatchOperand_Success; 8096 } 8097 8098 bool 8099 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8100 const OperandVector &Operands) { 8101 if (Ctrl == "row_newbcast") 8102 return isGFX90A(); 8103 8104 if (Ctrl == "row_share" || 8105 Ctrl == "row_xmask") 8106 return isGFX10Plus(); 8107 8108 if (Ctrl == "wave_shl" || 8109 Ctrl == "wave_shr" || 8110 Ctrl == "wave_rol" || 8111 Ctrl == "wave_ror" || 8112 Ctrl == "row_bcast") 8113 return isVI() || isGFX9(); 8114 8115 return Ctrl == "row_mirror" || 8116 Ctrl == "row_half_mirror" || 8117 Ctrl == "quad_perm" || 8118 Ctrl == "row_shl" || 8119 Ctrl == "row_shr" || 8120 Ctrl == "row_ror"; 8121 } 8122 8123 int64_t 8124 AMDGPUAsmParser::parseDPPCtrlPerm() { 8125 // quad_perm:[%d,%d,%d,%d] 8126 8127 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8128 return -1; 8129 8130 int64_t Val = 0; 8131 for (int i = 0; i < 4; ++i) { 8132 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8133 return -1; 8134 8135 int64_t Temp; 8136 SMLoc Loc = getLoc(); 8137 if (getParser().parseAbsoluteExpression(Temp)) 8138 return -1; 8139 if (Temp < 0 || Temp > 3) { 8140 Error(Loc, "expected a 2-bit value"); 8141 return -1; 8142 } 8143 8144 Val += (Temp << i * 2); 8145 } 8146 8147 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8148 return -1; 8149 8150 return Val; 8151 } 8152 8153 int64_t 8154 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8155 using namespace AMDGPU::DPP; 8156 8157 // sel:%d 8158 8159 int64_t Val; 8160 SMLoc Loc = getLoc(); 8161 8162 if (getParser().parseAbsoluteExpression(Val)) 8163 return -1; 8164 8165 struct DppCtrlCheck { 8166 int64_t Ctrl; 8167 int Lo; 8168 int Hi; 8169 }; 8170 8171 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8172 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8173 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8174 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8175 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8176 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8177 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8178 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8179 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8180 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8181 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8182 .Default({-1, 0, 0}); 8183 8184 bool Valid; 8185 if (Check.Ctrl == -1) { 8186 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8187 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8188 } else { 8189 Valid = Check.Lo <= Val && Val <= Check.Hi; 8190 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8191 } 8192 8193 if (!Valid) { 8194 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8195 return -1; 8196 } 8197 8198 return Val; 8199 } 8200 8201 OperandMatchResultTy 8202 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8203 using namespace AMDGPU::DPP; 8204 8205 if (!isToken(AsmToken::Identifier) || 8206 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8207 return MatchOperand_NoMatch; 8208 8209 SMLoc S = getLoc(); 8210 int64_t Val = -1; 8211 StringRef Ctrl; 8212 8213 parseId(Ctrl); 8214 8215 if (Ctrl == "row_mirror") { 8216 Val = DppCtrl::ROW_MIRROR; 8217 } else if (Ctrl == "row_half_mirror") { 8218 Val = DppCtrl::ROW_HALF_MIRROR; 8219 } else { 8220 if (skipToken(AsmToken::Colon, "expected a colon")) { 8221 if (Ctrl == "quad_perm") { 8222 Val = parseDPPCtrlPerm(); 8223 } else { 8224 Val = parseDPPCtrlSel(Ctrl); 8225 } 8226 } 8227 } 8228 8229 if (Val == -1) 8230 return MatchOperand_ParseFail; 8231 8232 Operands.push_back( 8233 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8234 return MatchOperand_Success; 8235 } 8236 8237 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8238 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8239 } 8240 8241 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8242 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8243 } 8244 8245 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8246 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8247 } 8248 8249 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8250 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8251 } 8252 8253 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8254 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8255 } 8256 8257 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8258 OptionalImmIndexMap OptionalIdx; 8259 8260 unsigned Opc = Inst.getOpcode(); 8261 bool HasModifiers = 8262 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8263 unsigned I = 1; 8264 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8265 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8266 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8267 } 8268 8269 int Fi = 0; 8270 for (unsigned E = Operands.size(); I != E; ++I) { 8271 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8272 MCOI::TIED_TO); 8273 if (TiedTo != -1) { 8274 assert((unsigned)TiedTo < Inst.getNumOperands()); 8275 // handle tied old or src2 for MAC instructions 8276 Inst.addOperand(Inst.getOperand(TiedTo)); 8277 } 8278 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8279 // Add the register arguments 8280 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8281 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8282 // Skip it. 8283 continue; 8284 } 8285 8286 if (IsDPP8) { 8287 if (Op.isDPP8()) { 8288 Op.addImmOperands(Inst, 1); 8289 } else if (HasModifiers && 8290 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8291 Op.addRegWithFPInputModsOperands(Inst, 2); 8292 } else if (Op.isFI()) { 8293 Fi = Op.getImm(); 8294 } else if (Op.isReg()) { 8295 Op.addRegOperands(Inst, 1); 8296 } else { 8297 llvm_unreachable("Invalid operand type"); 8298 } 8299 } else { 8300 if (HasModifiers && 8301 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8302 Op.addRegWithFPInputModsOperands(Inst, 2); 8303 } else if (Op.isReg()) { 8304 Op.addRegOperands(Inst, 1); 8305 } else if (Op.isDPPCtrl()) { 8306 Op.addImmOperands(Inst, 1); 8307 } else if (Op.isImm()) { 8308 // Handle optional arguments 8309 OptionalIdx[Op.getImmTy()] = I; 8310 } else { 8311 llvm_unreachable("Invalid operand type"); 8312 } 8313 } 8314 } 8315 8316 if (IsDPP8) { 8317 using namespace llvm::AMDGPU::DPP; 8318 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8319 } else { 8320 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8321 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8322 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8323 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8324 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8325 } 8326 } 8327 } 8328 8329 //===----------------------------------------------------------------------===// 8330 // sdwa 8331 //===----------------------------------------------------------------------===// 8332 8333 OperandMatchResultTy 8334 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8335 AMDGPUOperand::ImmTy Type) { 8336 using namespace llvm::AMDGPU::SDWA; 8337 8338 SMLoc S = getLoc(); 8339 StringRef Value; 8340 OperandMatchResultTy res; 8341 8342 SMLoc StringLoc; 8343 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8344 if (res != MatchOperand_Success) { 8345 return res; 8346 } 8347 8348 int64_t Int; 8349 Int = StringSwitch<int64_t>(Value) 8350 .Case("BYTE_0", SdwaSel::BYTE_0) 8351 .Case("BYTE_1", SdwaSel::BYTE_1) 8352 .Case("BYTE_2", SdwaSel::BYTE_2) 8353 .Case("BYTE_3", SdwaSel::BYTE_3) 8354 .Case("WORD_0", SdwaSel::WORD_0) 8355 .Case("WORD_1", SdwaSel::WORD_1) 8356 .Case("DWORD", SdwaSel::DWORD) 8357 .Default(0xffffffff); 8358 8359 if (Int == 0xffffffff) { 8360 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8361 return MatchOperand_ParseFail; 8362 } 8363 8364 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8365 return MatchOperand_Success; 8366 } 8367 8368 OperandMatchResultTy 8369 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8370 using namespace llvm::AMDGPU::SDWA; 8371 8372 SMLoc S = getLoc(); 8373 StringRef Value; 8374 OperandMatchResultTy res; 8375 8376 SMLoc StringLoc; 8377 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8378 if (res != MatchOperand_Success) { 8379 return res; 8380 } 8381 8382 int64_t Int; 8383 Int = StringSwitch<int64_t>(Value) 8384 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8385 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8386 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8387 .Default(0xffffffff); 8388 8389 if (Int == 0xffffffff) { 8390 Error(StringLoc, "invalid dst_unused value"); 8391 return MatchOperand_ParseFail; 8392 } 8393 8394 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8395 return MatchOperand_Success; 8396 } 8397 8398 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8399 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8400 } 8401 8402 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8403 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8404 } 8405 8406 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8407 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8408 } 8409 8410 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8411 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8412 } 8413 8414 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8415 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8416 } 8417 8418 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8419 uint64_t BasicInstType, 8420 bool SkipDstVcc, 8421 bool SkipSrcVcc) { 8422 using namespace llvm::AMDGPU::SDWA; 8423 8424 OptionalImmIndexMap OptionalIdx; 8425 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8426 bool SkippedVcc = false; 8427 8428 unsigned I = 1; 8429 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8430 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8431 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8432 } 8433 8434 for (unsigned E = Operands.size(); I != E; ++I) { 8435 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8436 if (SkipVcc && !SkippedVcc && Op.isReg() && 8437 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8438 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8439 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8440 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8441 // Skip VCC only if we didn't skip it on previous iteration. 8442 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8443 if (BasicInstType == SIInstrFlags::VOP2 && 8444 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8445 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8446 SkippedVcc = true; 8447 continue; 8448 } else if (BasicInstType == SIInstrFlags::VOPC && 8449 Inst.getNumOperands() == 0) { 8450 SkippedVcc = true; 8451 continue; 8452 } 8453 } 8454 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8455 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8456 } else if (Op.isImm()) { 8457 // Handle optional arguments 8458 OptionalIdx[Op.getImmTy()] = I; 8459 } else { 8460 llvm_unreachable("Invalid operand type"); 8461 } 8462 SkippedVcc = false; 8463 } 8464 8465 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8466 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8467 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8468 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8469 switch (BasicInstType) { 8470 case SIInstrFlags::VOP1: 8471 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8472 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8473 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8474 } 8475 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8476 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8477 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8478 break; 8479 8480 case SIInstrFlags::VOP2: 8481 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8482 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8483 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8484 } 8485 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8486 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8487 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8488 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8489 break; 8490 8491 case SIInstrFlags::VOPC: 8492 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8494 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8495 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8496 break; 8497 8498 default: 8499 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8500 } 8501 } 8502 8503 // special case v_mac_{f16, f32}: 8504 // it has src2 register operand that is tied to dst operand 8505 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8506 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8507 auto it = Inst.begin(); 8508 std::advance( 8509 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8510 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8511 } 8512 } 8513 8514 //===----------------------------------------------------------------------===// 8515 // mAI 8516 //===----------------------------------------------------------------------===// 8517 8518 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8519 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8520 } 8521 8522 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8523 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8524 } 8525 8526 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8527 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8528 } 8529 8530 /// Force static initialization. 8531 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8532 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8533 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8534 } 8535 8536 #define GET_REGISTER_MATCHER 8537 #define GET_MATCHER_IMPLEMENTATION 8538 #define GET_MNEMONIC_SPELL_CHECKER 8539 #define GET_MNEMONIC_CHECKER 8540 #include "AMDGPUGenAsmMatcher.inc" 8541 8542 // This function should be defined after auto-generated include so that we have 8543 // MatchClassKind enum defined 8544 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8545 unsigned Kind) { 8546 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8547 // But MatchInstructionImpl() expects to meet token and fails to validate 8548 // operand. This method checks if we are given immediate operand but expect to 8549 // get corresponding token. 8550 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8551 switch (Kind) { 8552 case MCK_addr64: 8553 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8554 case MCK_gds: 8555 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8556 case MCK_lds: 8557 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8558 case MCK_idxen: 8559 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8560 case MCK_offen: 8561 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8562 case MCK_SSrcB32: 8563 // When operands have expression values, they will return true for isToken, 8564 // because it is not possible to distinguish between a token and an 8565 // expression at parse time. MatchInstructionImpl() will always try to 8566 // match an operand as a token, when isToken returns true, and when the 8567 // name of the expression is not a valid token, the match will fail, 8568 // so we need to handle it here. 8569 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8570 case MCK_SSrcF32: 8571 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8572 case MCK_SoppBrTarget: 8573 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8574 case MCK_VReg32OrOff: 8575 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8576 case MCK_InterpSlot: 8577 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8578 case MCK_Attr: 8579 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8580 case MCK_AttrChan: 8581 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8582 case MCK_ImmSMEMOffset: 8583 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8584 case MCK_SReg_64: 8585 case MCK_SReg_64_XEXEC: 8586 // Null is defined as a 32-bit register but 8587 // it should also be enabled with 64-bit operands. 8588 // The following code enables it for SReg_64 operands 8589 // used as source and destination. Remaining source 8590 // operands are handled in isInlinableImm. 8591 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8592 default: 8593 return Match_InvalidOperand; 8594 } 8595 } 8596 8597 //===----------------------------------------------------------------------===// 8598 // endpgm 8599 //===----------------------------------------------------------------------===// 8600 8601 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8602 SMLoc S = getLoc(); 8603 int64_t Imm = 0; 8604 8605 if (!parseExpr(Imm)) { 8606 // The operand is optional, if not present default to 0 8607 Imm = 0; 8608 } 8609 8610 if (!isUInt<16>(Imm)) { 8611 Error(S, "expected a 16-bit value"); 8612 return MatchOperand_ParseFail; 8613 } 8614 8615 Operands.push_back( 8616 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8617 return MatchOperand_Success; 8618 } 8619 8620 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8621