1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/BinaryFormat/ELF.h" 24 #include "llvm/MC/MCAsmInfo.h" 25 #include "llvm/MC/MCContext.h" 26 #include "llvm/MC/MCExpr.h" 27 #include "llvm/MC/MCInst.h" 28 #include "llvm/MC/MCParser/MCAsmLexer.h" 29 #include "llvm/MC/MCParser/MCAsmParser.h" 30 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 31 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 32 #include "llvm/MC/MCSymbol.h" 33 #include "llvm/MC/TargetRegistry.h" 34 #include "llvm/Support/AMDGPUMetadata.h" 35 #include "llvm/Support/AMDHSAKernelDescriptor.h" 36 #include "llvm/Support/Casting.h" 37 #include "llvm/Support/MachineValueType.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/TargetParser.h" 40 41 using namespace llvm; 42 using namespace llvm::AMDGPU; 43 using namespace llvm::amdhsa; 44 45 namespace { 46 47 class AMDGPUAsmParser; 48 49 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 50 51 //===----------------------------------------------------------------------===// 52 // Operand 53 //===----------------------------------------------------------------------===// 54 55 class AMDGPUOperand : public MCParsedAsmOperand { 56 enum KindTy { 57 Token, 58 Immediate, 59 Register, 60 Expression 61 } Kind; 62 63 SMLoc StartLoc, EndLoc; 64 const AMDGPUAsmParser *AsmParser; 65 66 public: 67 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 68 : Kind(Kind_), AsmParser(AsmParser_) {} 69 70 using Ptr = std::unique_ptr<AMDGPUOperand>; 71 72 struct Modifiers { 73 bool Abs = false; 74 bool Neg = false; 75 bool Sext = false; 76 77 bool hasFPModifiers() const { return Abs || Neg; } 78 bool hasIntModifiers() const { return Sext; } 79 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 80 81 int64_t getFPModifiersOperand() const { 82 int64_t Operand = 0; 83 Operand |= Abs ? SISrcMods::ABS : 0u; 84 Operand |= Neg ? SISrcMods::NEG : 0u; 85 return Operand; 86 } 87 88 int64_t getIntModifiersOperand() const { 89 int64_t Operand = 0; 90 Operand |= Sext ? SISrcMods::SEXT : 0u; 91 return Operand; 92 } 93 94 int64_t getModifiersOperand() const { 95 assert(!(hasFPModifiers() && hasIntModifiers()) 96 && "fp and int modifiers should not be used simultaneously"); 97 if (hasFPModifiers()) { 98 return getFPModifiersOperand(); 99 } else if (hasIntModifiers()) { 100 return getIntModifiersOperand(); 101 } else { 102 return 0; 103 } 104 } 105 106 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 107 }; 108 109 enum ImmTy { 110 ImmTyNone, 111 ImmTyGDS, 112 ImmTyLDS, 113 ImmTyOffen, 114 ImmTyIdxen, 115 ImmTyAddr64, 116 ImmTyOffset, 117 ImmTyInstOffset, 118 ImmTyOffset0, 119 ImmTyOffset1, 120 ImmTyCPol, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrInline(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type); 254 } 255 256 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 257 return isRegOrInline(RCID, type) || isLiteralImm(type); 258 } 259 260 bool isRegOrImmWithInt16InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 262 } 263 264 bool isRegOrImmWithInt32InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 266 } 267 268 bool isRegOrImmWithInt64InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 270 } 271 272 bool isRegOrImmWithFP16InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 274 } 275 276 bool isRegOrImmWithFP32InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 278 } 279 280 bool isRegOrImmWithFP64InputMods() const { 281 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 282 } 283 284 bool isVReg() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID) || 286 isRegClass(AMDGPU::VReg_64RegClassID) || 287 isRegClass(AMDGPU::VReg_96RegClassID) || 288 isRegClass(AMDGPU::VReg_128RegClassID) || 289 isRegClass(AMDGPU::VReg_160RegClassID) || 290 isRegClass(AMDGPU::VReg_192RegClassID) || 291 isRegClass(AMDGPU::VReg_256RegClassID) || 292 isRegClass(AMDGPU::VReg_512RegClassID) || 293 isRegClass(AMDGPU::VReg_1024RegClassID); 294 } 295 296 bool isVReg32() const { 297 return isRegClass(AMDGPU::VGPR_32RegClassID); 298 } 299 300 bool isVReg32OrOff() const { 301 return isOff() || isVReg32(); 302 } 303 304 bool isNull() const { 305 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 306 } 307 308 bool isVRegWithInputMods() const; 309 310 bool isSDWAOperand(MVT type) const; 311 bool isSDWAFP16Operand() const; 312 bool isSDWAFP32Operand() const; 313 bool isSDWAInt16Operand() const; 314 bool isSDWAInt32Operand() const; 315 316 bool isImmTy(ImmTy ImmT) const { 317 return isImm() && Imm.Type == ImmT; 318 } 319 320 bool isImmModifier() const { 321 return isImm() && Imm.Type != ImmTyNone; 322 } 323 324 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 325 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 326 bool isDMask() const { return isImmTy(ImmTyDMask); } 327 bool isDim() const { return isImmTy(ImmTyDim); } 328 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 329 bool isDA() const { return isImmTy(ImmTyDA); } 330 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 331 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 332 bool isLWE() const { return isImmTy(ImmTyLWE); } 333 bool isOff() const { return isImmTy(ImmTyOff); } 334 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 335 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 336 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 337 bool isOffen() const { return isImmTy(ImmTyOffen); } 338 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 339 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 340 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 341 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 342 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 343 344 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 345 bool isGDS() const { return isImmTy(ImmTyGDS); } 346 bool isLDS() const { return isImmTy(ImmTyLDS); } 347 bool isCPol() const { return isImmTy(ImmTyCPol); } 348 bool isSWZ() const { return isImmTy(ImmTySWZ); } 349 bool isTFE() const { return isImmTy(ImmTyTFE); } 350 bool isD16() const { return isImmTy(ImmTyD16); } 351 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 352 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 353 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 354 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 355 bool isFI() const { return isImmTy(ImmTyDppFi); } 356 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 357 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 358 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 359 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 360 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 361 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 362 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 363 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 364 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 365 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 366 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 367 bool isHigh() const { return isImmTy(ImmTyHigh); } 368 369 bool isMod() const { 370 return isClampSI() || isOModSI(); 371 } 372 373 bool isRegOrImm() const { 374 return isReg() || isImm(); 375 } 376 377 bool isRegClass(unsigned RCID) const; 378 379 bool isInlineValue() const; 380 381 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 382 return isRegOrInline(RCID, type) && !hasModifiers(); 383 } 384 385 bool isSCSrcB16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 387 } 388 389 bool isSCSrcV2B16() const { 390 return isSCSrcB16(); 391 } 392 393 bool isSCSrcB32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 395 } 396 397 bool isSCSrcB64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 399 } 400 401 bool isBoolReg() const; 402 403 bool isSCSrcF16() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 405 } 406 407 bool isSCSrcV2F16() const { 408 return isSCSrcF16(); 409 } 410 411 bool isSCSrcF32() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 413 } 414 415 bool isSCSrcF64() const { 416 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 417 } 418 419 bool isSSrcB32() const { 420 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 421 } 422 423 bool isSSrcB16() const { 424 return isSCSrcB16() || isLiteralImm(MVT::i16); 425 } 426 427 bool isSSrcV2B16() const { 428 llvm_unreachable("cannot happen"); 429 return isSSrcB16(); 430 } 431 432 bool isSSrcB64() const { 433 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 434 // See isVSrc64(). 435 return isSCSrcB64() || isLiteralImm(MVT::i64); 436 } 437 438 bool isSSrcF32() const { 439 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 440 } 441 442 bool isSSrcF64() const { 443 return isSCSrcB64() || isLiteralImm(MVT::f64); 444 } 445 446 bool isSSrcF16() const { 447 return isSCSrcB16() || isLiteralImm(MVT::f16); 448 } 449 450 bool isSSrcV2F16() const { 451 llvm_unreachable("cannot happen"); 452 return isSSrcF16(); 453 } 454 455 bool isSSrcV2FP32() const { 456 llvm_unreachable("cannot happen"); 457 return isSSrcF32(); 458 } 459 460 bool isSCSrcV2FP32() const { 461 llvm_unreachable("cannot happen"); 462 return isSCSrcF32(); 463 } 464 465 bool isSSrcV2INT32() const { 466 llvm_unreachable("cannot happen"); 467 return isSSrcB32(); 468 } 469 470 bool isSCSrcV2INT32() const { 471 llvm_unreachable("cannot happen"); 472 return isSCSrcB32(); 473 } 474 475 bool isSSrcOrLdsB32() const { 476 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 477 isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVCSrcB32() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 482 } 483 484 bool isVCSrcB64() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 486 } 487 488 bool isVCSrcB16() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 490 } 491 492 bool isVCSrcV2B16() const { 493 return isVCSrcB16(); 494 } 495 496 bool isVCSrcF32() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 498 } 499 500 bool isVCSrcF64() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 502 } 503 504 bool isVCSrcF16() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 506 } 507 508 bool isVCSrcV2F16() const { 509 return isVCSrcF16(); 510 } 511 512 bool isVSrcB32() const { 513 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 514 } 515 516 bool isVSrcB64() const { 517 return isVCSrcF64() || isLiteralImm(MVT::i64); 518 } 519 520 bool isVSrcB16() const { 521 return isVCSrcB16() || isLiteralImm(MVT::i16); 522 } 523 524 bool isVSrcV2B16() const { 525 return isVSrcB16() || isLiteralImm(MVT::v2i16); 526 } 527 528 bool isVCSrcV2FP32() const { 529 return isVCSrcF64(); 530 } 531 532 bool isVSrcV2FP32() const { 533 return isVSrcF64() || isLiteralImm(MVT::v2f32); 534 } 535 536 bool isVCSrcV2INT32() const { 537 return isVCSrcB64(); 538 } 539 540 bool isVSrcV2INT32() const { 541 return isVSrcB64() || isLiteralImm(MVT::v2i32); 542 } 543 544 bool isVSrcF32() const { 545 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 546 } 547 548 bool isVSrcF64() const { 549 return isVCSrcF64() || isLiteralImm(MVT::f64); 550 } 551 552 bool isVSrcF16() const { 553 return isVCSrcF16() || isLiteralImm(MVT::f16); 554 } 555 556 bool isVSrcV2F16() const { 557 return isVSrcF16() || isLiteralImm(MVT::v2f16); 558 } 559 560 bool isVISrcB32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 562 } 563 564 bool isVISrcB16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 566 } 567 568 bool isVISrcV2B16() const { 569 return isVISrcB16(); 570 } 571 572 bool isVISrcF32() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 574 } 575 576 bool isVISrcF16() const { 577 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 578 } 579 580 bool isVISrcV2F16() const { 581 return isVISrcF16() || isVISrcB32(); 582 } 583 584 bool isVISrc_64B64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 586 } 587 588 bool isVISrc_64F64() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 590 } 591 592 bool isVISrc_64V2FP32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 594 } 595 596 bool isVISrc_64V2INT32() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 598 } 599 600 bool isVISrc_256B64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 602 } 603 604 bool isVISrc_256F64() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 606 } 607 608 bool isVISrc_128B16() const { 609 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 610 } 611 612 bool isVISrc_128V2B16() const { 613 return isVISrc_128B16(); 614 } 615 616 bool isVISrc_128B32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 618 } 619 620 bool isVISrc_128F32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2FP32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 626 } 627 628 bool isVISrc_256V2INT32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B32() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 634 } 635 636 bool isVISrc_512B16() const { 637 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 638 } 639 640 bool isVISrc_512V2B16() const { 641 return isVISrc_512B16(); 642 } 643 644 bool isVISrc_512F32() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 646 } 647 648 bool isVISrc_512F16() const { 649 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 650 } 651 652 bool isVISrc_512V2F16() const { 653 return isVISrc_512F16() || isVISrc_512B32(); 654 } 655 656 bool isVISrc_1024B32() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 658 } 659 660 bool isVISrc_1024B16() const { 661 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 662 } 663 664 bool isVISrc_1024V2B16() const { 665 return isVISrc_1024B16(); 666 } 667 668 bool isVISrc_1024F32() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 670 } 671 672 bool isVISrc_1024F16() const { 673 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 674 } 675 676 bool isVISrc_1024V2F16() const { 677 return isVISrc_1024F16() || isVISrc_1024B32(); 678 } 679 680 bool isAISrcB32() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 682 } 683 684 bool isAISrcB16() const { 685 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 686 } 687 688 bool isAISrcV2B16() const { 689 return isAISrcB16(); 690 } 691 692 bool isAISrcF32() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 694 } 695 696 bool isAISrcF16() const { 697 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 698 } 699 700 bool isAISrcV2F16() const { 701 return isAISrcF16() || isAISrcB32(); 702 } 703 704 bool isAISrc_64B64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 706 } 707 708 bool isAISrc_64F64() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 710 } 711 712 bool isAISrc_128B32() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 714 } 715 716 bool isAISrc_128B16() const { 717 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 718 } 719 720 bool isAISrc_128V2B16() const { 721 return isAISrc_128B16(); 722 } 723 724 bool isAISrc_128F32() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 726 } 727 728 bool isAISrc_128F16() const { 729 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 730 } 731 732 bool isAISrc_128V2F16() const { 733 return isAISrc_128F16() || isAISrc_128B32(); 734 } 735 736 bool isVISrc_128F16() const { 737 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 738 } 739 740 bool isVISrc_128V2F16() const { 741 return isVISrc_128F16() || isVISrc_128B32(); 742 } 743 744 bool isAISrc_256B64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 746 } 747 748 bool isAISrc_256F64() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 750 } 751 752 bool isAISrc_512B32() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 754 } 755 756 bool isAISrc_512B16() const { 757 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 758 } 759 760 bool isAISrc_512V2B16() const { 761 return isAISrc_512B16(); 762 } 763 764 bool isAISrc_512F32() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 766 } 767 768 bool isAISrc_512F16() const { 769 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 770 } 771 772 bool isAISrc_512V2F16() const { 773 return isAISrc_512F16() || isAISrc_512B32(); 774 } 775 776 bool isAISrc_1024B32() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 778 } 779 780 bool isAISrc_1024B16() const { 781 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 782 } 783 784 bool isAISrc_1024V2B16() const { 785 return isAISrc_1024B16(); 786 } 787 788 bool isAISrc_1024F32() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 790 } 791 792 bool isAISrc_1024F16() const { 793 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 794 } 795 796 bool isAISrc_1024V2F16() const { 797 return isAISrc_1024F16() || isAISrc_1024B32(); 798 } 799 800 bool isKImmFP32() const { 801 return isLiteralImm(MVT::f32); 802 } 803 804 bool isKImmFP16() const { 805 return isLiteralImm(MVT::f16); 806 } 807 808 bool isMem() const override { 809 return false; 810 } 811 812 bool isExpr() const { 813 return Kind == Expression; 814 } 815 816 bool isSoppBrTarget() const { 817 return isExpr() || isImm(); 818 } 819 820 bool isSWaitCnt() const; 821 bool isDepCtr() const; 822 bool isHwreg() const; 823 bool isSendMsg() const; 824 bool isSwizzle() const; 825 bool isSMRDOffset8() const; 826 bool isSMEMOffset() const; 827 bool isSMRDLiteralOffset() const; 828 bool isDPP8() const; 829 bool isDPPCtrl() const; 830 bool isBLGP() const; 831 bool isCBSZ() const; 832 bool isABID() const; 833 bool isGPRIdxMode() const; 834 bool isS16Imm() const; 835 bool isU16Imm() const; 836 bool isEndpgm() const; 837 838 StringRef getExpressionAsToken() const { 839 assert(isExpr()); 840 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 841 return S->getSymbol().getName(); 842 } 843 844 StringRef getToken() const { 845 assert(isToken()); 846 847 if (Kind == Expression) 848 return getExpressionAsToken(); 849 850 return StringRef(Tok.Data, Tok.Length); 851 } 852 853 int64_t getImm() const { 854 assert(isImm()); 855 return Imm.Val; 856 } 857 858 void setImm(int64_t Val) { 859 assert(isImm()); 860 Imm.Val = Val; 861 } 862 863 ImmTy getImmTy() const { 864 assert(isImm()); 865 return Imm.Type; 866 } 867 868 unsigned getReg() const override { 869 assert(isRegKind()); 870 return Reg.RegNo; 871 } 872 873 SMLoc getStartLoc() const override { 874 return StartLoc; 875 } 876 877 SMLoc getEndLoc() const override { 878 return EndLoc; 879 } 880 881 SMRange getLocRange() const { 882 return SMRange(StartLoc, EndLoc); 883 } 884 885 Modifiers getModifiers() const { 886 assert(isRegKind() || isImmTy(ImmTyNone)); 887 return isRegKind() ? Reg.Mods : Imm.Mods; 888 } 889 890 void setModifiers(Modifiers Mods) { 891 assert(isRegKind() || isImmTy(ImmTyNone)); 892 if (isRegKind()) 893 Reg.Mods = Mods; 894 else 895 Imm.Mods = Mods; 896 } 897 898 bool hasModifiers() const { 899 return getModifiers().hasModifiers(); 900 } 901 902 bool hasFPModifiers() const { 903 return getModifiers().hasFPModifiers(); 904 } 905 906 bool hasIntModifiers() const { 907 return getModifiers().hasIntModifiers(); 908 } 909 910 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 911 912 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 913 914 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 915 916 template <unsigned Bitwidth> 917 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 918 919 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 920 addKImmFPOperands<16>(Inst, N); 921 } 922 923 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 924 addKImmFPOperands<32>(Inst, N); 925 } 926 927 void addRegOperands(MCInst &Inst, unsigned N) const; 928 929 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 930 addRegOperands(Inst, N); 931 } 932 933 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 934 if (isRegKind()) 935 addRegOperands(Inst, N); 936 else if (isExpr()) 937 Inst.addOperand(MCOperand::createExpr(Expr)); 938 else 939 addImmOperands(Inst, N); 940 } 941 942 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 943 Modifiers Mods = getModifiers(); 944 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 945 if (isRegKind()) { 946 addRegOperands(Inst, N); 947 } else { 948 addImmOperands(Inst, N, false); 949 } 950 } 951 952 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 953 assert(!hasIntModifiers()); 954 addRegOrImmWithInputModsOperands(Inst, N); 955 } 956 957 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 958 assert(!hasFPModifiers()); 959 addRegOrImmWithInputModsOperands(Inst, N); 960 } 961 962 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 963 Modifiers Mods = getModifiers(); 964 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 965 assert(isRegKind()); 966 addRegOperands(Inst, N); 967 } 968 969 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 970 assert(!hasIntModifiers()); 971 addRegWithInputModsOperands(Inst, N); 972 } 973 974 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 975 assert(!hasFPModifiers()); 976 addRegWithInputModsOperands(Inst, N); 977 } 978 979 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 980 if (isImm()) 981 addImmOperands(Inst, N); 982 else { 983 assert(isExpr()); 984 Inst.addOperand(MCOperand::createExpr(Expr)); 985 } 986 } 987 988 static void printImmTy(raw_ostream& OS, ImmTy Type) { 989 switch (Type) { 990 case ImmTyNone: OS << "None"; break; 991 case ImmTyGDS: OS << "GDS"; break; 992 case ImmTyLDS: OS << "LDS"; break; 993 case ImmTyOffen: OS << "Offen"; break; 994 case ImmTyIdxen: OS << "Idxen"; break; 995 case ImmTyAddr64: OS << "Addr64"; break; 996 case ImmTyOffset: OS << "Offset"; break; 997 case ImmTyInstOffset: OS << "InstOffset"; break; 998 case ImmTyOffset0: OS << "Offset0"; break; 999 case ImmTyOffset1: OS << "Offset1"; break; 1000 case ImmTyCPol: OS << "CPol"; break; 1001 case ImmTySWZ: OS << "SWZ"; break; 1002 case ImmTyTFE: OS << "TFE"; break; 1003 case ImmTyD16: OS << "D16"; break; 1004 case ImmTyFORMAT: OS << "FORMAT"; break; 1005 case ImmTyClampSI: OS << "ClampSI"; break; 1006 case ImmTyOModSI: OS << "OModSI"; break; 1007 case ImmTyDPP8: OS << "DPP8"; break; 1008 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1009 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1010 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1011 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1012 case ImmTyDppFi: OS << "FI"; break; 1013 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1014 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1015 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1016 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1017 case ImmTyDMask: OS << "DMask"; break; 1018 case ImmTyDim: OS << "Dim"; break; 1019 case ImmTyUNorm: OS << "UNorm"; break; 1020 case ImmTyDA: OS << "DA"; break; 1021 case ImmTyR128A16: OS << "R128A16"; break; 1022 case ImmTyA16: OS << "A16"; break; 1023 case ImmTyLWE: OS << "LWE"; break; 1024 case ImmTyOff: OS << "Off"; break; 1025 case ImmTyExpTgt: OS << "ExpTgt"; break; 1026 case ImmTyExpCompr: OS << "ExpCompr"; break; 1027 case ImmTyExpVM: OS << "ExpVM"; break; 1028 case ImmTyHwreg: OS << "Hwreg"; break; 1029 case ImmTySendMsg: OS << "SendMsg"; break; 1030 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1031 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1032 case ImmTyAttrChan: OS << "AttrChan"; break; 1033 case ImmTyOpSel: OS << "OpSel"; break; 1034 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1035 case ImmTyNegLo: OS << "NegLo"; break; 1036 case ImmTyNegHi: OS << "NegHi"; break; 1037 case ImmTySwizzle: OS << "Swizzle"; break; 1038 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1039 case ImmTyHigh: OS << "High"; break; 1040 case ImmTyBLGP: OS << "BLGP"; break; 1041 case ImmTyCBSZ: OS << "CBSZ"; break; 1042 case ImmTyABID: OS << "ABID"; break; 1043 case ImmTyEndpgm: OS << "Endpgm"; break; 1044 } 1045 } 1046 1047 void print(raw_ostream &OS) const override { 1048 switch (Kind) { 1049 case Register: 1050 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1051 break; 1052 case Immediate: 1053 OS << '<' << getImm(); 1054 if (getImmTy() != ImmTyNone) { 1055 OS << " type: "; printImmTy(OS, getImmTy()); 1056 } 1057 OS << " mods: " << Imm.Mods << '>'; 1058 break; 1059 case Token: 1060 OS << '\'' << getToken() << '\''; 1061 break; 1062 case Expression: 1063 OS << "<expr " << *Expr << '>'; 1064 break; 1065 } 1066 } 1067 1068 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1069 int64_t Val, SMLoc Loc, 1070 ImmTy Type = ImmTyNone, 1071 bool IsFPImm = false) { 1072 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1073 Op->Imm.Val = Val; 1074 Op->Imm.IsFPImm = IsFPImm; 1075 Op->Imm.Kind = ImmKindTyNone; 1076 Op->Imm.Type = Type; 1077 Op->Imm.Mods = Modifiers(); 1078 Op->StartLoc = Loc; 1079 Op->EndLoc = Loc; 1080 return Op; 1081 } 1082 1083 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1084 StringRef Str, SMLoc Loc, 1085 bool HasExplicitEncodingSize = true) { 1086 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1087 Res->Tok.Data = Str.data(); 1088 Res->Tok.Length = Str.size(); 1089 Res->StartLoc = Loc; 1090 Res->EndLoc = Loc; 1091 return Res; 1092 } 1093 1094 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1095 unsigned RegNo, SMLoc S, 1096 SMLoc E) { 1097 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1098 Op->Reg.RegNo = RegNo; 1099 Op->Reg.Mods = Modifiers(); 1100 Op->StartLoc = S; 1101 Op->EndLoc = E; 1102 return Op; 1103 } 1104 1105 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1106 const class MCExpr *Expr, SMLoc S) { 1107 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1108 Op->Expr = Expr; 1109 Op->StartLoc = S; 1110 Op->EndLoc = S; 1111 return Op; 1112 } 1113 }; 1114 1115 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1116 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1117 return OS; 1118 } 1119 1120 //===----------------------------------------------------------------------===// 1121 // AsmParser 1122 //===----------------------------------------------------------------------===// 1123 1124 // Holds info related to the current kernel, e.g. count of SGPRs used. 1125 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1126 // .amdgpu_hsa_kernel or at EOF. 1127 class KernelScopeInfo { 1128 int SgprIndexUnusedMin = -1; 1129 int VgprIndexUnusedMin = -1; 1130 int AgprIndexUnusedMin = -1; 1131 MCContext *Ctx = nullptr; 1132 MCSubtargetInfo const *MSTI = nullptr; 1133 1134 void usesSgprAt(int i) { 1135 if (i >= SgprIndexUnusedMin) { 1136 SgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol* const Sym = 1139 Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1140 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1141 } 1142 } 1143 } 1144 1145 void usesVgprAt(int i) { 1146 if (i >= VgprIndexUnusedMin) { 1147 VgprIndexUnusedMin = ++i; 1148 if (Ctx) { 1149 MCSymbol* const Sym = 1150 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1151 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1152 VgprIndexUnusedMin); 1153 Sym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1154 } 1155 } 1156 } 1157 1158 void usesAgprAt(int i) { 1159 // Instruction will error in AMDGPUAsmParser::MatchAndEmitInstruction 1160 if (!hasMAIInsts(*MSTI)) 1161 return; 1162 1163 if (i >= AgprIndexUnusedMin) { 1164 AgprIndexUnusedMin = ++i; 1165 if (Ctx) { 1166 MCSymbol* const Sym = 1167 Ctx->getOrCreateSymbol(Twine(".kernel.agpr_count")); 1168 Sym->setVariableValue(MCConstantExpr::create(AgprIndexUnusedMin, *Ctx)); 1169 1170 // Also update vgpr_count (dependent on agpr_count for gfx908/gfx90a) 1171 MCSymbol* const vSym = 1172 Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1173 int totalVGPR = getTotalNumVGPRs(isGFX90A(*MSTI), AgprIndexUnusedMin, 1174 VgprIndexUnusedMin); 1175 vSym->setVariableValue(MCConstantExpr::create(totalVGPR, *Ctx)); 1176 } 1177 } 1178 } 1179 1180 public: 1181 KernelScopeInfo() = default; 1182 1183 void initialize(MCContext &Context) { 1184 Ctx = &Context; 1185 MSTI = Ctx->getSubtargetInfo(); 1186 1187 usesSgprAt(SgprIndexUnusedMin = -1); 1188 usesVgprAt(VgprIndexUnusedMin = -1); 1189 if (hasMAIInsts(*MSTI)) { 1190 usesAgprAt(AgprIndexUnusedMin = -1); 1191 } 1192 } 1193 1194 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, 1195 unsigned RegWidth) { 1196 switch (RegKind) { 1197 case IS_SGPR: 1198 usesSgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1199 break; 1200 case IS_AGPR: 1201 usesAgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1202 break; 1203 case IS_VGPR: 1204 usesVgprAt(DwordRegIndex + divideCeil(RegWidth, 32) - 1); 1205 break; 1206 default: 1207 break; 1208 } 1209 } 1210 }; 1211 1212 class AMDGPUAsmParser : public MCTargetAsmParser { 1213 MCAsmParser &Parser; 1214 1215 // Number of extra operands parsed after the first optional operand. 1216 // This may be necessary to skip hardcoded mandatory operands. 1217 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1218 1219 unsigned ForcedEncodingSize = 0; 1220 bool ForcedDPP = false; 1221 bool ForcedSDWA = false; 1222 KernelScopeInfo KernelScope; 1223 unsigned CPolSeen; 1224 1225 /// @name Auto-generated Match Functions 1226 /// { 1227 1228 #define GET_ASSEMBLER_HEADER 1229 #include "AMDGPUGenAsmMatcher.inc" 1230 1231 /// } 1232 1233 private: 1234 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1235 bool OutOfRangeError(SMRange Range); 1236 /// Calculate VGPR/SGPR blocks required for given target, reserved 1237 /// registers, and user-specified NextFreeXGPR values. 1238 /// 1239 /// \param Features [in] Target features, used for bug corrections. 1240 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1241 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1242 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1243 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1244 /// descriptor field, if valid. 1245 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1246 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1247 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1248 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1249 /// \param VGPRBlocks [out] Result VGPR block count. 1250 /// \param SGPRBlocks [out] Result SGPR block count. 1251 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1252 bool FlatScrUsed, bool XNACKUsed, 1253 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1254 SMRange VGPRRange, unsigned NextFreeSGPR, 1255 SMRange SGPRRange, unsigned &VGPRBlocks, 1256 unsigned &SGPRBlocks); 1257 bool ParseDirectiveAMDGCNTarget(); 1258 bool ParseDirectiveAMDHSAKernel(); 1259 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1260 bool ParseDirectiveHSACodeObjectVersion(); 1261 bool ParseDirectiveHSACodeObjectISA(); 1262 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1263 bool ParseDirectiveAMDKernelCodeT(); 1264 // TODO: Possibly make subtargetHasRegister const. 1265 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1266 bool ParseDirectiveAMDGPUHsaKernel(); 1267 1268 bool ParseDirectiveISAVersion(); 1269 bool ParseDirectiveHSAMetadata(); 1270 bool ParseDirectivePALMetadataBegin(); 1271 bool ParseDirectivePALMetadata(); 1272 bool ParseDirectiveAMDGPULDS(); 1273 1274 /// Common code to parse out a block of text (typically YAML) between start and 1275 /// end directives. 1276 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1277 const char *AssemblerDirectiveEnd, 1278 std::string &CollectString); 1279 1280 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1281 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1282 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1283 unsigned &RegNum, unsigned &RegWidth, 1284 bool RestoreOnFailure = false); 1285 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1286 unsigned &RegNum, unsigned &RegWidth, 1287 SmallVectorImpl<AsmToken> &Tokens); 1288 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1289 unsigned &RegWidth, 1290 SmallVectorImpl<AsmToken> &Tokens); 1291 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1292 unsigned &RegWidth, 1293 SmallVectorImpl<AsmToken> &Tokens); 1294 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1295 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1296 bool ParseRegRange(unsigned& Num, unsigned& Width); 1297 unsigned getRegularReg(RegisterKind RegKind, 1298 unsigned RegNum, 1299 unsigned RegWidth, 1300 SMLoc Loc); 1301 1302 bool isRegister(); 1303 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1304 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1305 void initializeGprCountSymbol(RegisterKind RegKind); 1306 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1307 unsigned RegWidth); 1308 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1309 bool IsAtomic, bool IsLds = false); 1310 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1311 bool IsGdsHardcoded); 1312 1313 public: 1314 enum AMDGPUMatchResultTy { 1315 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1316 }; 1317 enum OperandMode { 1318 OperandMode_Default, 1319 OperandMode_NSA, 1320 }; 1321 1322 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1323 1324 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1325 const MCInstrInfo &MII, 1326 const MCTargetOptions &Options) 1327 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1328 MCAsmParserExtension::Initialize(Parser); 1329 1330 if (getFeatureBits().none()) { 1331 // Set default features. 1332 copySTI().ToggleFeature("southern-islands"); 1333 } 1334 1335 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1336 1337 { 1338 // TODO: make those pre-defined variables read-only. 1339 // Currently there is none suitable machinery in the core llvm-mc for this. 1340 // MCSymbol::isRedefinable is intended for another purpose, and 1341 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1342 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1343 MCContext &Ctx = getContext(); 1344 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1345 MCSymbol *Sym = 1346 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1347 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1348 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1349 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1350 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1351 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1352 } else { 1353 MCSymbol *Sym = 1354 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1355 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1356 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1357 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1358 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1359 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1360 } 1361 if (ISA.Major >= 6 && isHsaAbiVersion3AndAbove(&getSTI())) { 1362 initializeGprCountSymbol(IS_VGPR); 1363 initializeGprCountSymbol(IS_SGPR); 1364 } else 1365 KernelScope.initialize(getContext()); 1366 } 1367 } 1368 1369 bool hasMIMG_R128() const { 1370 return AMDGPU::hasMIMG_R128(getSTI()); 1371 } 1372 1373 bool hasPackedD16() const { 1374 return AMDGPU::hasPackedD16(getSTI()); 1375 } 1376 1377 bool hasGFX10A16() const { 1378 return AMDGPU::hasGFX10A16(getSTI()); 1379 } 1380 1381 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1382 1383 bool isSI() const { 1384 return AMDGPU::isSI(getSTI()); 1385 } 1386 1387 bool isCI() const { 1388 return AMDGPU::isCI(getSTI()); 1389 } 1390 1391 bool isVI() const { 1392 return AMDGPU::isVI(getSTI()); 1393 } 1394 1395 bool isGFX9() const { 1396 return AMDGPU::isGFX9(getSTI()); 1397 } 1398 1399 // TODO: isGFX90A is also true for GFX940. We need to clean it. 1400 bool isGFX90A() const { 1401 return AMDGPU::isGFX90A(getSTI()); 1402 } 1403 1404 bool isGFX940() const { 1405 return AMDGPU::isGFX940(getSTI()); 1406 } 1407 1408 bool isGFX9Plus() const { 1409 return AMDGPU::isGFX9Plus(getSTI()); 1410 } 1411 1412 bool isGFX10() const { 1413 return AMDGPU::isGFX10(getSTI()); 1414 } 1415 1416 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1417 1418 bool isGFX10_BEncoding() const { 1419 return AMDGPU::isGFX10_BEncoding(getSTI()); 1420 } 1421 1422 bool hasInv2PiInlineImm() const { 1423 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1424 } 1425 1426 bool hasFlatOffsets() const { 1427 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1428 } 1429 1430 bool hasArchitectedFlatScratch() const { 1431 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1432 } 1433 1434 bool hasSGPR102_SGPR103() const { 1435 return !isVI() && !isGFX9(); 1436 } 1437 1438 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1439 1440 bool hasIntClamp() const { 1441 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1442 } 1443 1444 AMDGPUTargetStreamer &getTargetStreamer() { 1445 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1446 return static_cast<AMDGPUTargetStreamer &>(TS); 1447 } 1448 1449 const MCRegisterInfo *getMRI() const { 1450 // We need this const_cast because for some reason getContext() is not const 1451 // in MCAsmParser. 1452 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1453 } 1454 1455 const MCInstrInfo *getMII() const { 1456 return &MII; 1457 } 1458 1459 const FeatureBitset &getFeatureBits() const { 1460 return getSTI().getFeatureBits(); 1461 } 1462 1463 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1464 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1465 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1466 1467 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1468 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1469 bool isForcedDPP() const { return ForcedDPP; } 1470 bool isForcedSDWA() const { return ForcedSDWA; } 1471 ArrayRef<unsigned> getMatchedVariants() const; 1472 StringRef getMatchedVariantName() const; 1473 1474 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1475 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1476 bool RestoreOnFailure); 1477 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1478 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1479 SMLoc &EndLoc) override; 1480 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1481 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1482 unsigned Kind) override; 1483 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1484 OperandVector &Operands, MCStreamer &Out, 1485 uint64_t &ErrorInfo, 1486 bool MatchingInlineAsm) override; 1487 bool ParseDirective(AsmToken DirectiveID) override; 1488 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1489 OperandMode Mode = OperandMode_Default); 1490 StringRef parseMnemonicSuffix(StringRef Name); 1491 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1492 SMLoc NameLoc, OperandVector &Operands) override; 1493 //bool ProcessInstruction(MCInst &Inst); 1494 1495 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1496 1497 OperandMatchResultTy 1498 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1499 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1500 bool (*ConvertResult)(int64_t &) = nullptr); 1501 1502 OperandMatchResultTy 1503 parseOperandArrayWithPrefix(const char *Prefix, 1504 OperandVector &Operands, 1505 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1506 bool (*ConvertResult)(int64_t&) = nullptr); 1507 1508 OperandMatchResultTy 1509 parseNamedBit(StringRef Name, OperandVector &Operands, 1510 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1511 OperandMatchResultTy parseCPol(OperandVector &Operands); 1512 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1513 StringRef &Value, 1514 SMLoc &StringLoc); 1515 1516 bool isModifier(); 1517 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1518 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1519 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1520 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1521 bool parseSP3NegModifier(); 1522 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1523 OperandMatchResultTy parseReg(OperandVector &Operands); 1524 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1525 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1526 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1527 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1528 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1529 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1530 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1531 OperandMatchResultTy parseUfmt(int64_t &Format); 1532 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1533 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1534 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1535 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1536 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1537 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1538 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1539 1540 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1541 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1542 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1543 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1544 1545 bool parseCnt(int64_t &IntVal); 1546 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1547 1548 bool parseDepCtr(int64_t &IntVal, unsigned &Mask); 1549 void depCtrError(SMLoc Loc, int ErrorId, StringRef DepCtrName); 1550 OperandMatchResultTy parseDepCtrOps(OperandVector &Operands); 1551 1552 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1553 1554 private: 1555 struct OperandInfoTy { 1556 SMLoc Loc; 1557 int64_t Id; 1558 bool IsSymbolic = false; 1559 bool IsDefined = false; 1560 1561 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1562 }; 1563 1564 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1565 bool validateSendMsg(const OperandInfoTy &Msg, 1566 const OperandInfoTy &Op, 1567 const OperandInfoTy &Stream); 1568 1569 bool parseHwregBody(OperandInfoTy &HwReg, 1570 OperandInfoTy &Offset, 1571 OperandInfoTy &Width); 1572 bool validateHwreg(const OperandInfoTy &HwReg, 1573 const OperandInfoTy &Offset, 1574 const OperandInfoTy &Width); 1575 1576 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1577 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1578 SMLoc getBLGPLoc(const OperandVector &Operands) const; 1579 1580 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1581 const OperandVector &Operands) const; 1582 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1583 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1584 SMLoc getLitLoc(const OperandVector &Operands) const; 1585 SMLoc getConstLoc(const OperandVector &Operands) const; 1586 1587 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1588 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1589 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1590 bool validateSOPLiteral(const MCInst &Inst) const; 1591 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1592 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1593 bool validateIntClampSupported(const MCInst &Inst); 1594 bool validateMIMGAtomicDMask(const MCInst &Inst); 1595 bool validateMIMGGatherDMask(const MCInst &Inst); 1596 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1597 Optional<StringRef> validateMIMGDataSize(const MCInst &Inst); 1598 bool validateMIMGAddrSize(const MCInst &Inst); 1599 bool validateMIMGD16(const MCInst &Inst); 1600 bool validateMIMGDim(const MCInst &Inst); 1601 bool validateMIMGMSAA(const MCInst &Inst); 1602 bool validateOpSel(const MCInst &Inst); 1603 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1604 bool validateVccOperand(unsigned Reg) const; 1605 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1606 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1607 bool validateMFMA(const MCInst &Inst, const OperandVector &Operands); 1608 bool validateAGPRLdSt(const MCInst &Inst) const; 1609 bool validateVGPRAlign(const MCInst &Inst) const; 1610 bool validateBLGP(const MCInst &Inst, const OperandVector &Operands); 1611 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1612 bool validateDivScale(const MCInst &Inst); 1613 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1614 const SMLoc &IDLoc); 1615 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1616 unsigned getConstantBusLimit(unsigned Opcode) const; 1617 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1618 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1619 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1620 1621 bool isSupportedMnemo(StringRef Mnemo, 1622 const FeatureBitset &FBS); 1623 bool isSupportedMnemo(StringRef Mnemo, 1624 const FeatureBitset &FBS, 1625 ArrayRef<unsigned> Variants); 1626 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1627 1628 bool isId(const StringRef Id) const; 1629 bool isId(const AsmToken &Token, const StringRef Id) const; 1630 bool isToken(const AsmToken::TokenKind Kind) const; 1631 bool trySkipId(const StringRef Id); 1632 bool trySkipId(const StringRef Pref, const StringRef Id); 1633 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1634 bool trySkipToken(const AsmToken::TokenKind Kind); 1635 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1636 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1637 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1638 1639 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1640 AsmToken::TokenKind getTokenKind() const; 1641 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1642 bool parseExpr(OperandVector &Operands); 1643 StringRef getTokenStr() const; 1644 AsmToken peekToken(); 1645 AsmToken getToken() const; 1646 SMLoc getLoc() const; 1647 void lex(); 1648 1649 public: 1650 void onBeginOfFile() override; 1651 1652 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1653 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1654 1655 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1656 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1657 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1658 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1659 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1660 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1661 1662 bool parseSwizzleOperand(int64_t &Op, 1663 const unsigned MinVal, 1664 const unsigned MaxVal, 1665 const StringRef ErrMsg, 1666 SMLoc &Loc); 1667 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1668 const unsigned MinVal, 1669 const unsigned MaxVal, 1670 const StringRef ErrMsg); 1671 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1672 bool parseSwizzleOffset(int64_t &Imm); 1673 bool parseSwizzleMacro(int64_t &Imm); 1674 bool parseSwizzleQuadPerm(int64_t &Imm); 1675 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1676 bool parseSwizzleBroadcast(int64_t &Imm); 1677 bool parseSwizzleSwap(int64_t &Imm); 1678 bool parseSwizzleReverse(int64_t &Imm); 1679 1680 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1681 int64_t parseGPRIdxMacro(); 1682 1683 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1684 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1685 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1686 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1687 1688 AMDGPUOperand::Ptr defaultCPol() const; 1689 1690 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1691 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1692 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1693 AMDGPUOperand::Ptr defaultFlatOffset() const; 1694 1695 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1696 1697 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1698 OptionalImmIndexMap &OptionalIdx); 1699 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1700 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1701 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1702 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1703 OptionalImmIndexMap &OptionalIdx); 1704 1705 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1706 1707 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1708 bool IsAtomic = false); 1709 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1710 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1711 1712 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1713 1714 bool parseDimId(unsigned &Encoding); 1715 OperandMatchResultTy parseDim(OperandVector &Operands); 1716 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1717 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1718 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1719 int64_t parseDPPCtrlSel(StringRef Ctrl); 1720 int64_t parseDPPCtrlPerm(); 1721 AMDGPUOperand::Ptr defaultRowMask() const; 1722 AMDGPUOperand::Ptr defaultBankMask() const; 1723 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1724 AMDGPUOperand::Ptr defaultFI() const; 1725 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1726 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1727 1728 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1729 AMDGPUOperand::ImmTy Type); 1730 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1731 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1732 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1733 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1734 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1735 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1736 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1737 uint64_t BasicInstType, 1738 bool SkipDstVcc = false, 1739 bool SkipSrcVcc = false); 1740 1741 AMDGPUOperand::Ptr defaultBLGP() const; 1742 AMDGPUOperand::Ptr defaultCBSZ() const; 1743 AMDGPUOperand::Ptr defaultABID() const; 1744 1745 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1746 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1747 }; 1748 1749 struct OptionalOperand { 1750 const char *Name; 1751 AMDGPUOperand::ImmTy Type; 1752 bool IsBit; 1753 bool (*ConvertResult)(int64_t&); 1754 }; 1755 1756 } // end anonymous namespace 1757 1758 // May be called with integer type with equivalent bitwidth. 1759 static const fltSemantics *getFltSemantics(unsigned Size) { 1760 switch (Size) { 1761 case 4: 1762 return &APFloat::IEEEsingle(); 1763 case 8: 1764 return &APFloat::IEEEdouble(); 1765 case 2: 1766 return &APFloat::IEEEhalf(); 1767 default: 1768 llvm_unreachable("unsupported fp type"); 1769 } 1770 } 1771 1772 static const fltSemantics *getFltSemantics(MVT VT) { 1773 return getFltSemantics(VT.getSizeInBits() / 8); 1774 } 1775 1776 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1777 switch (OperandType) { 1778 case AMDGPU::OPERAND_REG_IMM_INT32: 1779 case AMDGPU::OPERAND_REG_IMM_FP32: 1780 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1781 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1782 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1783 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1784 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1785 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1786 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1787 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1788 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1789 case AMDGPU::OPERAND_KIMM32: 1790 return &APFloat::IEEEsingle(); 1791 case AMDGPU::OPERAND_REG_IMM_INT64: 1792 case AMDGPU::OPERAND_REG_IMM_FP64: 1793 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1794 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1795 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1796 return &APFloat::IEEEdouble(); 1797 case AMDGPU::OPERAND_REG_IMM_INT16: 1798 case AMDGPU::OPERAND_REG_IMM_FP16: 1799 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1800 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1801 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1802 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1808 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1809 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1810 case AMDGPU::OPERAND_KIMM16: 1811 return &APFloat::IEEEhalf(); 1812 default: 1813 llvm_unreachable("unsupported fp type"); 1814 } 1815 } 1816 1817 //===----------------------------------------------------------------------===// 1818 // Operand 1819 //===----------------------------------------------------------------------===// 1820 1821 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1822 bool Lost; 1823 1824 // Convert literal to single precision 1825 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1826 APFloat::rmNearestTiesToEven, 1827 &Lost); 1828 // We allow precision lost but not overflow or underflow 1829 if (Status != APFloat::opOK && 1830 Lost && 1831 ((Status & APFloat::opOverflow) != 0 || 1832 (Status & APFloat::opUnderflow) != 0)) { 1833 return false; 1834 } 1835 1836 return true; 1837 } 1838 1839 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1840 return isUIntN(Size, Val) || isIntN(Size, Val); 1841 } 1842 1843 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1844 if (VT.getScalarType() == MVT::i16) { 1845 // FP immediate values are broken. 1846 return isInlinableIntLiteral(Val); 1847 } 1848 1849 // f16/v2f16 operands work correctly for all values. 1850 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1851 } 1852 1853 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1854 1855 // This is a hack to enable named inline values like 1856 // shared_base with both 32-bit and 64-bit operands. 1857 // Note that these values are defined as 1858 // 32-bit operands only. 1859 if (isInlineValue()) { 1860 return true; 1861 } 1862 1863 if (!isImmTy(ImmTyNone)) { 1864 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1865 return false; 1866 } 1867 // TODO: We should avoid using host float here. It would be better to 1868 // check the float bit values which is what a few other places do. 1869 // We've had bot failures before due to weird NaN support on mips hosts. 1870 1871 APInt Literal(64, Imm.Val); 1872 1873 if (Imm.IsFPImm) { // We got fp literal token 1874 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1875 return AMDGPU::isInlinableLiteral64(Imm.Val, 1876 AsmParser->hasInv2PiInlineImm()); 1877 } 1878 1879 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1880 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1881 return false; 1882 1883 if (type.getScalarSizeInBits() == 16) { 1884 return isInlineableLiteralOp16( 1885 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1886 type, AsmParser->hasInv2PiInlineImm()); 1887 } 1888 1889 // Check if single precision literal is inlinable 1890 return AMDGPU::isInlinableLiteral32( 1891 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1892 AsmParser->hasInv2PiInlineImm()); 1893 } 1894 1895 // We got int literal token. 1896 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1897 return AMDGPU::isInlinableLiteral64(Imm.Val, 1898 AsmParser->hasInv2PiInlineImm()); 1899 } 1900 1901 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1902 return false; 1903 } 1904 1905 if (type.getScalarSizeInBits() == 16) { 1906 return isInlineableLiteralOp16( 1907 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1908 type, AsmParser->hasInv2PiInlineImm()); 1909 } 1910 1911 return AMDGPU::isInlinableLiteral32( 1912 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1913 AsmParser->hasInv2PiInlineImm()); 1914 } 1915 1916 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1917 // Check that this immediate can be added as literal 1918 if (!isImmTy(ImmTyNone)) { 1919 return false; 1920 } 1921 1922 if (!Imm.IsFPImm) { 1923 // We got int literal token. 1924 1925 if (type == MVT::f64 && hasFPModifiers()) { 1926 // Cannot apply fp modifiers to int literals preserving the same semantics 1927 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1928 // disable these cases. 1929 return false; 1930 } 1931 1932 unsigned Size = type.getSizeInBits(); 1933 if (Size == 64) 1934 Size = 32; 1935 1936 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1937 // types. 1938 return isSafeTruncation(Imm.Val, Size); 1939 } 1940 1941 // We got fp literal token 1942 if (type == MVT::f64) { // Expected 64-bit fp operand 1943 // We would set low 64-bits of literal to zeroes but we accept this literals 1944 return true; 1945 } 1946 1947 if (type == MVT::i64) { // Expected 64-bit int operand 1948 // We don't allow fp literals in 64-bit integer instructions. It is 1949 // unclear how we should encode them. 1950 return false; 1951 } 1952 1953 // We allow fp literals with f16x2 operands assuming that the specified 1954 // literal goes into the lower half and the upper half is zero. We also 1955 // require that the literal may be losslessly converted to f16. 1956 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1957 (type == MVT::v2i16)? MVT::i16 : 1958 (type == MVT::v2f32)? MVT::f32 : type; 1959 1960 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1961 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1962 } 1963 1964 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1965 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1966 } 1967 1968 bool AMDGPUOperand::isVRegWithInputMods() const { 1969 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1970 // GFX90A allows DPP on 64-bit operands. 1971 (isRegClass(AMDGPU::VReg_64RegClassID) && 1972 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1973 } 1974 1975 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1976 if (AsmParser->isVI()) 1977 return isVReg32(); 1978 else if (AsmParser->isGFX9Plus()) 1979 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1980 else 1981 return false; 1982 } 1983 1984 bool AMDGPUOperand::isSDWAFP16Operand() const { 1985 return isSDWAOperand(MVT::f16); 1986 } 1987 1988 bool AMDGPUOperand::isSDWAFP32Operand() const { 1989 return isSDWAOperand(MVT::f32); 1990 } 1991 1992 bool AMDGPUOperand::isSDWAInt16Operand() const { 1993 return isSDWAOperand(MVT::i16); 1994 } 1995 1996 bool AMDGPUOperand::isSDWAInt32Operand() const { 1997 return isSDWAOperand(MVT::i32); 1998 } 1999 2000 bool AMDGPUOperand::isBoolReg() const { 2001 auto FB = AsmParser->getFeatureBits(); 2002 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 2003 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 2004 } 2005 2006 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 2007 { 2008 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2009 assert(Size == 2 || Size == 4 || Size == 8); 2010 2011 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 2012 2013 if (Imm.Mods.Abs) { 2014 Val &= ~FpSignMask; 2015 } 2016 if (Imm.Mods.Neg) { 2017 Val ^= FpSignMask; 2018 } 2019 2020 return Val; 2021 } 2022 2023 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 2024 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 2025 Inst.getNumOperands())) { 2026 addLiteralImmOperand(Inst, Imm.Val, 2027 ApplyModifiers & 2028 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 2029 } else { 2030 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 2031 Inst.addOperand(MCOperand::createImm(Imm.Val)); 2032 setImmKindNone(); 2033 } 2034 } 2035 2036 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 2037 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 2038 auto OpNum = Inst.getNumOperands(); 2039 // Check that this operand accepts literals 2040 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 2041 2042 if (ApplyModifiers) { 2043 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 2044 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 2045 Val = applyInputFPModifiers(Val, Size); 2046 } 2047 2048 APInt Literal(64, Val); 2049 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 2050 2051 if (Imm.IsFPImm) { // We got fp literal token 2052 switch (OpTy) { 2053 case AMDGPU::OPERAND_REG_IMM_INT64: 2054 case AMDGPU::OPERAND_REG_IMM_FP64: 2055 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2056 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2057 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2058 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2059 AsmParser->hasInv2PiInlineImm())) { 2060 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2061 setImmKindConst(); 2062 return; 2063 } 2064 2065 // Non-inlineable 2066 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2067 // For fp operands we check if low 32 bits are zeros 2068 if (Literal.getLoBits(32) != 0) { 2069 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2070 "Can't encode literal as exact 64-bit floating-point operand. " 2071 "Low 32-bits will be set to zero"); 2072 } 2073 2074 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2075 setImmKindLiteral(); 2076 return; 2077 } 2078 2079 // We don't allow fp literals in 64-bit integer instructions. It is 2080 // unclear how we should encode them. This case should be checked earlier 2081 // in predicate methods (isLiteralImm()) 2082 llvm_unreachable("fp literal in 64-bit integer instruction."); 2083 2084 case AMDGPU::OPERAND_REG_IMM_INT32: 2085 case AMDGPU::OPERAND_REG_IMM_FP32: 2086 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2087 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2088 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2089 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2090 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2091 case AMDGPU::OPERAND_REG_IMM_INT16: 2092 case AMDGPU::OPERAND_REG_IMM_FP16: 2093 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2094 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2095 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2096 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2097 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2098 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2099 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2100 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2102 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2103 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2104 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2105 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2106 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2107 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2108 case AMDGPU::OPERAND_KIMM32: 2109 case AMDGPU::OPERAND_KIMM16: { 2110 bool lost; 2111 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2112 // Convert literal to single precision 2113 FPLiteral.convert(*getOpFltSemantics(OpTy), 2114 APFloat::rmNearestTiesToEven, &lost); 2115 // We allow precision lost but not overflow or underflow. This should be 2116 // checked earlier in isLiteralImm() 2117 2118 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2119 Inst.addOperand(MCOperand::createImm(ImmVal)); 2120 setImmKindLiteral(); 2121 return; 2122 } 2123 default: 2124 llvm_unreachable("invalid operand size"); 2125 } 2126 2127 return; 2128 } 2129 2130 // We got int literal token. 2131 // Only sign extend inline immediates. 2132 switch (OpTy) { 2133 case AMDGPU::OPERAND_REG_IMM_INT32: 2134 case AMDGPU::OPERAND_REG_IMM_FP32: 2135 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2136 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2137 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2138 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2139 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2140 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2141 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2142 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2143 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2144 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2145 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2146 if (isSafeTruncation(Val, 32) && 2147 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2148 AsmParser->hasInv2PiInlineImm())) { 2149 Inst.addOperand(MCOperand::createImm(Val)); 2150 setImmKindConst(); 2151 return; 2152 } 2153 2154 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2155 setImmKindLiteral(); 2156 return; 2157 2158 case AMDGPU::OPERAND_REG_IMM_INT64: 2159 case AMDGPU::OPERAND_REG_IMM_FP64: 2160 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2161 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2162 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2163 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2164 Inst.addOperand(MCOperand::createImm(Val)); 2165 setImmKindConst(); 2166 return; 2167 } 2168 2169 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2170 setImmKindLiteral(); 2171 return; 2172 2173 case AMDGPU::OPERAND_REG_IMM_INT16: 2174 case AMDGPU::OPERAND_REG_IMM_FP16: 2175 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2176 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2177 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2178 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2179 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2180 if (isSafeTruncation(Val, 16) && 2181 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2182 AsmParser->hasInv2PiInlineImm())) { 2183 Inst.addOperand(MCOperand::createImm(Val)); 2184 setImmKindConst(); 2185 return; 2186 } 2187 2188 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2189 setImmKindLiteral(); 2190 return; 2191 2192 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2193 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2194 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2195 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2196 assert(isSafeTruncation(Val, 16)); 2197 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2198 AsmParser->hasInv2PiInlineImm())); 2199 2200 Inst.addOperand(MCOperand::createImm(Val)); 2201 return; 2202 } 2203 case AMDGPU::OPERAND_KIMM32: 2204 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2205 setImmKindNone(); 2206 return; 2207 case AMDGPU::OPERAND_KIMM16: 2208 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2209 setImmKindNone(); 2210 return; 2211 default: 2212 llvm_unreachable("invalid operand size"); 2213 } 2214 } 2215 2216 template <unsigned Bitwidth> 2217 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2218 APInt Literal(64, Imm.Val); 2219 setImmKindNone(); 2220 2221 if (!Imm.IsFPImm) { 2222 // We got int literal token. 2223 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2224 return; 2225 } 2226 2227 bool Lost; 2228 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2229 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2230 APFloat::rmNearestTiesToEven, &Lost); 2231 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2232 } 2233 2234 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2235 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2236 } 2237 2238 static bool isInlineValue(unsigned Reg) { 2239 switch (Reg) { 2240 case AMDGPU::SRC_SHARED_BASE: 2241 case AMDGPU::SRC_SHARED_LIMIT: 2242 case AMDGPU::SRC_PRIVATE_BASE: 2243 case AMDGPU::SRC_PRIVATE_LIMIT: 2244 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2245 return true; 2246 case AMDGPU::SRC_VCCZ: 2247 case AMDGPU::SRC_EXECZ: 2248 case AMDGPU::SRC_SCC: 2249 return true; 2250 case AMDGPU::SGPR_NULL: 2251 return true; 2252 default: 2253 return false; 2254 } 2255 } 2256 2257 bool AMDGPUOperand::isInlineValue() const { 2258 return isRegKind() && ::isInlineValue(getReg()); 2259 } 2260 2261 //===----------------------------------------------------------------------===// 2262 // AsmParser 2263 //===----------------------------------------------------------------------===// 2264 2265 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2266 if (Is == IS_VGPR) { 2267 switch (RegWidth) { 2268 default: return -1; 2269 case 32: 2270 return AMDGPU::VGPR_32RegClassID; 2271 case 64: 2272 return AMDGPU::VReg_64RegClassID; 2273 case 96: 2274 return AMDGPU::VReg_96RegClassID; 2275 case 128: 2276 return AMDGPU::VReg_128RegClassID; 2277 case 160: 2278 return AMDGPU::VReg_160RegClassID; 2279 case 192: 2280 return AMDGPU::VReg_192RegClassID; 2281 case 224: 2282 return AMDGPU::VReg_224RegClassID; 2283 case 256: 2284 return AMDGPU::VReg_256RegClassID; 2285 case 512: 2286 return AMDGPU::VReg_512RegClassID; 2287 case 1024: 2288 return AMDGPU::VReg_1024RegClassID; 2289 } 2290 } else if (Is == IS_TTMP) { 2291 switch (RegWidth) { 2292 default: return -1; 2293 case 32: 2294 return AMDGPU::TTMP_32RegClassID; 2295 case 64: 2296 return AMDGPU::TTMP_64RegClassID; 2297 case 128: 2298 return AMDGPU::TTMP_128RegClassID; 2299 case 256: 2300 return AMDGPU::TTMP_256RegClassID; 2301 case 512: 2302 return AMDGPU::TTMP_512RegClassID; 2303 } 2304 } else if (Is == IS_SGPR) { 2305 switch (RegWidth) { 2306 default: return -1; 2307 case 32: 2308 return AMDGPU::SGPR_32RegClassID; 2309 case 64: 2310 return AMDGPU::SGPR_64RegClassID; 2311 case 96: 2312 return AMDGPU::SGPR_96RegClassID; 2313 case 128: 2314 return AMDGPU::SGPR_128RegClassID; 2315 case 160: 2316 return AMDGPU::SGPR_160RegClassID; 2317 case 192: 2318 return AMDGPU::SGPR_192RegClassID; 2319 case 224: 2320 return AMDGPU::SGPR_224RegClassID; 2321 case 256: 2322 return AMDGPU::SGPR_256RegClassID; 2323 case 512: 2324 return AMDGPU::SGPR_512RegClassID; 2325 } 2326 } else if (Is == IS_AGPR) { 2327 switch (RegWidth) { 2328 default: return -1; 2329 case 32: 2330 return AMDGPU::AGPR_32RegClassID; 2331 case 64: 2332 return AMDGPU::AReg_64RegClassID; 2333 case 96: 2334 return AMDGPU::AReg_96RegClassID; 2335 case 128: 2336 return AMDGPU::AReg_128RegClassID; 2337 case 160: 2338 return AMDGPU::AReg_160RegClassID; 2339 case 192: 2340 return AMDGPU::AReg_192RegClassID; 2341 case 224: 2342 return AMDGPU::AReg_224RegClassID; 2343 case 256: 2344 return AMDGPU::AReg_256RegClassID; 2345 case 512: 2346 return AMDGPU::AReg_512RegClassID; 2347 case 1024: 2348 return AMDGPU::AReg_1024RegClassID; 2349 } 2350 } 2351 return -1; 2352 } 2353 2354 static unsigned getSpecialRegForName(StringRef RegName) { 2355 return StringSwitch<unsigned>(RegName) 2356 .Case("exec", AMDGPU::EXEC) 2357 .Case("vcc", AMDGPU::VCC) 2358 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2359 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2360 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2361 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2362 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2363 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2364 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2365 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2366 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2367 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2368 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2369 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2370 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2371 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2372 .Case("m0", AMDGPU::M0) 2373 .Case("vccz", AMDGPU::SRC_VCCZ) 2374 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2375 .Case("execz", AMDGPU::SRC_EXECZ) 2376 .Case("src_execz", AMDGPU::SRC_EXECZ) 2377 .Case("scc", AMDGPU::SRC_SCC) 2378 .Case("src_scc", AMDGPU::SRC_SCC) 2379 .Case("tba", AMDGPU::TBA) 2380 .Case("tma", AMDGPU::TMA) 2381 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2382 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2383 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2384 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2385 .Case("vcc_lo", AMDGPU::VCC_LO) 2386 .Case("vcc_hi", AMDGPU::VCC_HI) 2387 .Case("exec_lo", AMDGPU::EXEC_LO) 2388 .Case("exec_hi", AMDGPU::EXEC_HI) 2389 .Case("tma_lo", AMDGPU::TMA_LO) 2390 .Case("tma_hi", AMDGPU::TMA_HI) 2391 .Case("tba_lo", AMDGPU::TBA_LO) 2392 .Case("tba_hi", AMDGPU::TBA_HI) 2393 .Case("pc", AMDGPU::PC_REG) 2394 .Case("null", AMDGPU::SGPR_NULL) 2395 .Default(AMDGPU::NoRegister); 2396 } 2397 2398 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2399 SMLoc &EndLoc, bool RestoreOnFailure) { 2400 auto R = parseRegister(); 2401 if (!R) return true; 2402 assert(R->isReg()); 2403 RegNo = R->getReg(); 2404 StartLoc = R->getStartLoc(); 2405 EndLoc = R->getEndLoc(); 2406 return false; 2407 } 2408 2409 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2410 SMLoc &EndLoc) { 2411 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2412 } 2413 2414 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2415 SMLoc &StartLoc, 2416 SMLoc &EndLoc) { 2417 bool Result = 2418 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2419 bool PendingErrors = getParser().hasPendingError(); 2420 getParser().clearPendingErrors(); 2421 if (PendingErrors) 2422 return MatchOperand_ParseFail; 2423 if (Result) 2424 return MatchOperand_NoMatch; 2425 return MatchOperand_Success; 2426 } 2427 2428 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2429 RegisterKind RegKind, unsigned Reg1, 2430 SMLoc Loc) { 2431 switch (RegKind) { 2432 case IS_SPECIAL: 2433 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2434 Reg = AMDGPU::EXEC; 2435 RegWidth = 64; 2436 return true; 2437 } 2438 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2439 Reg = AMDGPU::FLAT_SCR; 2440 RegWidth = 64; 2441 return true; 2442 } 2443 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2444 Reg = AMDGPU::XNACK_MASK; 2445 RegWidth = 64; 2446 return true; 2447 } 2448 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2449 Reg = AMDGPU::VCC; 2450 RegWidth = 64; 2451 return true; 2452 } 2453 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2454 Reg = AMDGPU::TBA; 2455 RegWidth = 64; 2456 return true; 2457 } 2458 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2459 Reg = AMDGPU::TMA; 2460 RegWidth = 64; 2461 return true; 2462 } 2463 Error(Loc, "register does not fit in the list"); 2464 return false; 2465 case IS_VGPR: 2466 case IS_SGPR: 2467 case IS_AGPR: 2468 case IS_TTMP: 2469 if (Reg1 != Reg + RegWidth / 32) { 2470 Error(Loc, "registers in a list must have consecutive indices"); 2471 return false; 2472 } 2473 RegWidth += 32; 2474 return true; 2475 default: 2476 llvm_unreachable("unexpected register kind"); 2477 } 2478 } 2479 2480 struct RegInfo { 2481 StringLiteral Name; 2482 RegisterKind Kind; 2483 }; 2484 2485 static constexpr RegInfo RegularRegisters[] = { 2486 {{"v"}, IS_VGPR}, 2487 {{"s"}, IS_SGPR}, 2488 {{"ttmp"}, IS_TTMP}, 2489 {{"acc"}, IS_AGPR}, 2490 {{"a"}, IS_AGPR}, 2491 }; 2492 2493 static bool isRegularReg(RegisterKind Kind) { 2494 return Kind == IS_VGPR || 2495 Kind == IS_SGPR || 2496 Kind == IS_TTMP || 2497 Kind == IS_AGPR; 2498 } 2499 2500 static const RegInfo* getRegularRegInfo(StringRef Str) { 2501 for (const RegInfo &Reg : RegularRegisters) 2502 if (Str.startswith(Reg.Name)) 2503 return &Reg; 2504 return nullptr; 2505 } 2506 2507 static bool getRegNum(StringRef Str, unsigned& Num) { 2508 return !Str.getAsInteger(10, Num); 2509 } 2510 2511 bool 2512 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2513 const AsmToken &NextToken) const { 2514 2515 // A list of consecutive registers: [s0,s1,s2,s3] 2516 if (Token.is(AsmToken::LBrac)) 2517 return true; 2518 2519 if (!Token.is(AsmToken::Identifier)) 2520 return false; 2521 2522 // A single register like s0 or a range of registers like s[0:1] 2523 2524 StringRef Str = Token.getString(); 2525 const RegInfo *Reg = getRegularRegInfo(Str); 2526 if (Reg) { 2527 StringRef RegName = Reg->Name; 2528 StringRef RegSuffix = Str.substr(RegName.size()); 2529 if (!RegSuffix.empty()) { 2530 unsigned Num; 2531 // A single register with an index: rXX 2532 if (getRegNum(RegSuffix, Num)) 2533 return true; 2534 } else { 2535 // A range of registers: r[XX:YY]. 2536 if (NextToken.is(AsmToken::LBrac)) 2537 return true; 2538 } 2539 } 2540 2541 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2542 } 2543 2544 bool 2545 AMDGPUAsmParser::isRegister() 2546 { 2547 return isRegister(getToken(), peekToken()); 2548 } 2549 2550 unsigned 2551 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2552 unsigned RegNum, 2553 unsigned RegWidth, 2554 SMLoc Loc) { 2555 2556 assert(isRegularReg(RegKind)); 2557 2558 unsigned AlignSize = 1; 2559 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2560 // SGPR and TTMP registers must be aligned. 2561 // Max required alignment is 4 dwords. 2562 AlignSize = std::min(RegWidth / 32, 4u); 2563 } 2564 2565 if (RegNum % AlignSize != 0) { 2566 Error(Loc, "invalid register alignment"); 2567 return AMDGPU::NoRegister; 2568 } 2569 2570 unsigned RegIdx = RegNum / AlignSize; 2571 int RCID = getRegClass(RegKind, RegWidth); 2572 if (RCID == -1) { 2573 Error(Loc, "invalid or unsupported register size"); 2574 return AMDGPU::NoRegister; 2575 } 2576 2577 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2578 const MCRegisterClass RC = TRI->getRegClass(RCID); 2579 if (RegIdx >= RC.getNumRegs()) { 2580 Error(Loc, "register index is out of range"); 2581 return AMDGPU::NoRegister; 2582 } 2583 2584 return RC.getRegister(RegIdx); 2585 } 2586 2587 bool AMDGPUAsmParser::ParseRegRange(unsigned &Num, unsigned &RegWidth) { 2588 int64_t RegLo, RegHi; 2589 if (!skipToken(AsmToken::LBrac, "missing register index")) 2590 return false; 2591 2592 SMLoc FirstIdxLoc = getLoc(); 2593 SMLoc SecondIdxLoc; 2594 2595 if (!parseExpr(RegLo)) 2596 return false; 2597 2598 if (trySkipToken(AsmToken::Colon)) { 2599 SecondIdxLoc = getLoc(); 2600 if (!parseExpr(RegHi)) 2601 return false; 2602 } else { 2603 RegHi = RegLo; 2604 } 2605 2606 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2607 return false; 2608 2609 if (!isUInt<32>(RegLo)) { 2610 Error(FirstIdxLoc, "invalid register index"); 2611 return false; 2612 } 2613 2614 if (!isUInt<32>(RegHi)) { 2615 Error(SecondIdxLoc, "invalid register index"); 2616 return false; 2617 } 2618 2619 if (RegLo > RegHi) { 2620 Error(FirstIdxLoc, "first register index should not exceed second index"); 2621 return false; 2622 } 2623 2624 Num = static_cast<unsigned>(RegLo); 2625 RegWidth = 32 * ((RegHi - RegLo) + 1); 2626 return true; 2627 } 2628 2629 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2630 unsigned &RegNum, unsigned &RegWidth, 2631 SmallVectorImpl<AsmToken> &Tokens) { 2632 assert(isToken(AsmToken::Identifier)); 2633 unsigned Reg = getSpecialRegForName(getTokenStr()); 2634 if (Reg) { 2635 RegNum = 0; 2636 RegWidth = 32; 2637 RegKind = IS_SPECIAL; 2638 Tokens.push_back(getToken()); 2639 lex(); // skip register name 2640 } 2641 return Reg; 2642 } 2643 2644 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2645 unsigned &RegNum, unsigned &RegWidth, 2646 SmallVectorImpl<AsmToken> &Tokens) { 2647 assert(isToken(AsmToken::Identifier)); 2648 StringRef RegName = getTokenStr(); 2649 auto Loc = getLoc(); 2650 2651 const RegInfo *RI = getRegularRegInfo(RegName); 2652 if (!RI) { 2653 Error(Loc, "invalid register name"); 2654 return AMDGPU::NoRegister; 2655 } 2656 2657 Tokens.push_back(getToken()); 2658 lex(); // skip register name 2659 2660 RegKind = RI->Kind; 2661 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2662 if (!RegSuffix.empty()) { 2663 // Single 32-bit register: vXX. 2664 if (!getRegNum(RegSuffix, RegNum)) { 2665 Error(Loc, "invalid register index"); 2666 return AMDGPU::NoRegister; 2667 } 2668 RegWidth = 32; 2669 } else { 2670 // Range of registers: v[XX:YY]. ":YY" is optional. 2671 if (!ParseRegRange(RegNum, RegWidth)) 2672 return AMDGPU::NoRegister; 2673 } 2674 2675 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2676 } 2677 2678 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2679 unsigned &RegWidth, 2680 SmallVectorImpl<AsmToken> &Tokens) { 2681 unsigned Reg = AMDGPU::NoRegister; 2682 auto ListLoc = getLoc(); 2683 2684 if (!skipToken(AsmToken::LBrac, 2685 "expected a register or a list of registers")) { 2686 return AMDGPU::NoRegister; 2687 } 2688 2689 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2690 2691 auto Loc = getLoc(); 2692 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2693 return AMDGPU::NoRegister; 2694 if (RegWidth != 32) { 2695 Error(Loc, "expected a single 32-bit register"); 2696 return AMDGPU::NoRegister; 2697 } 2698 2699 for (; trySkipToken(AsmToken::Comma); ) { 2700 RegisterKind NextRegKind; 2701 unsigned NextReg, NextRegNum, NextRegWidth; 2702 Loc = getLoc(); 2703 2704 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2705 NextRegNum, NextRegWidth, 2706 Tokens)) { 2707 return AMDGPU::NoRegister; 2708 } 2709 if (NextRegWidth != 32) { 2710 Error(Loc, "expected a single 32-bit register"); 2711 return AMDGPU::NoRegister; 2712 } 2713 if (NextRegKind != RegKind) { 2714 Error(Loc, "registers in a list must be of the same kind"); 2715 return AMDGPU::NoRegister; 2716 } 2717 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2718 return AMDGPU::NoRegister; 2719 } 2720 2721 if (!skipToken(AsmToken::RBrac, 2722 "expected a comma or a closing square bracket")) { 2723 return AMDGPU::NoRegister; 2724 } 2725 2726 if (isRegularReg(RegKind)) 2727 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2728 2729 return Reg; 2730 } 2731 2732 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2733 unsigned &RegNum, unsigned &RegWidth, 2734 SmallVectorImpl<AsmToken> &Tokens) { 2735 auto Loc = getLoc(); 2736 Reg = AMDGPU::NoRegister; 2737 2738 if (isToken(AsmToken::Identifier)) { 2739 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2740 if (Reg == AMDGPU::NoRegister) 2741 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2742 } else { 2743 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2744 } 2745 2746 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2747 if (Reg == AMDGPU::NoRegister) { 2748 assert(Parser.hasPendingError()); 2749 return false; 2750 } 2751 2752 if (!subtargetHasRegister(*TRI, Reg)) { 2753 if (Reg == AMDGPU::SGPR_NULL) { 2754 Error(Loc, "'null' operand is not supported on this GPU"); 2755 } else { 2756 Error(Loc, "register not available on this GPU"); 2757 } 2758 return false; 2759 } 2760 2761 return true; 2762 } 2763 2764 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2765 unsigned &RegNum, unsigned &RegWidth, 2766 bool RestoreOnFailure /*=false*/) { 2767 Reg = AMDGPU::NoRegister; 2768 2769 SmallVector<AsmToken, 1> Tokens; 2770 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2771 if (RestoreOnFailure) { 2772 while (!Tokens.empty()) { 2773 getLexer().UnLex(Tokens.pop_back_val()); 2774 } 2775 } 2776 return true; 2777 } 2778 return false; 2779 } 2780 2781 Optional<StringRef> 2782 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2783 switch (RegKind) { 2784 case IS_VGPR: 2785 return StringRef(".amdgcn.next_free_vgpr"); 2786 case IS_SGPR: 2787 return StringRef(".amdgcn.next_free_sgpr"); 2788 default: 2789 return None; 2790 } 2791 } 2792 2793 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2794 auto SymbolName = getGprCountSymbolName(RegKind); 2795 assert(SymbolName && "initializing invalid register kind"); 2796 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2797 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2798 } 2799 2800 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2801 unsigned DwordRegIndex, 2802 unsigned RegWidth) { 2803 // Symbols are only defined for GCN targets 2804 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2805 return true; 2806 2807 auto SymbolName = getGprCountSymbolName(RegKind); 2808 if (!SymbolName) 2809 return true; 2810 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2811 2812 int64_t NewMax = DwordRegIndex + divideCeil(RegWidth, 32) - 1; 2813 int64_t OldCount; 2814 2815 if (!Sym->isVariable()) 2816 return !Error(getLoc(), 2817 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2818 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2819 return !Error( 2820 getLoc(), 2821 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2822 2823 if (OldCount <= NewMax) 2824 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2825 2826 return true; 2827 } 2828 2829 std::unique_ptr<AMDGPUOperand> 2830 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2831 const auto &Tok = getToken(); 2832 SMLoc StartLoc = Tok.getLoc(); 2833 SMLoc EndLoc = Tok.getEndLoc(); 2834 RegisterKind RegKind; 2835 unsigned Reg, RegNum, RegWidth; 2836 2837 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2838 return nullptr; 2839 } 2840 if (isHsaAbiVersion3AndAbove(&getSTI())) { 2841 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2842 return nullptr; 2843 } else 2844 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2845 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2846 } 2847 2848 OperandMatchResultTy 2849 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2850 // TODO: add syntactic sugar for 1/(2*PI) 2851 2852 assert(!isRegister()); 2853 assert(!isModifier()); 2854 2855 const auto& Tok = getToken(); 2856 const auto& NextTok = peekToken(); 2857 bool IsReal = Tok.is(AsmToken::Real); 2858 SMLoc S = getLoc(); 2859 bool Negate = false; 2860 2861 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2862 lex(); 2863 IsReal = true; 2864 Negate = true; 2865 } 2866 2867 if (IsReal) { 2868 // Floating-point expressions are not supported. 2869 // Can only allow floating-point literals with an 2870 // optional sign. 2871 2872 StringRef Num = getTokenStr(); 2873 lex(); 2874 2875 APFloat RealVal(APFloat::IEEEdouble()); 2876 auto roundMode = APFloat::rmNearestTiesToEven; 2877 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2878 return MatchOperand_ParseFail; 2879 } 2880 if (Negate) 2881 RealVal.changeSign(); 2882 2883 Operands.push_back( 2884 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2885 AMDGPUOperand::ImmTyNone, true)); 2886 2887 return MatchOperand_Success; 2888 2889 } else { 2890 int64_t IntVal; 2891 const MCExpr *Expr; 2892 SMLoc S = getLoc(); 2893 2894 if (HasSP3AbsModifier) { 2895 // This is a workaround for handling expressions 2896 // as arguments of SP3 'abs' modifier, for example: 2897 // |1.0| 2898 // |-1| 2899 // |1+x| 2900 // This syntax is not compatible with syntax of standard 2901 // MC expressions (due to the trailing '|'). 2902 SMLoc EndLoc; 2903 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2904 return MatchOperand_ParseFail; 2905 } else { 2906 if (Parser.parseExpression(Expr)) 2907 return MatchOperand_ParseFail; 2908 } 2909 2910 if (Expr->evaluateAsAbsolute(IntVal)) { 2911 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2912 } else { 2913 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2914 } 2915 2916 return MatchOperand_Success; 2917 } 2918 2919 return MatchOperand_NoMatch; 2920 } 2921 2922 OperandMatchResultTy 2923 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2924 if (!isRegister()) 2925 return MatchOperand_NoMatch; 2926 2927 if (auto R = parseRegister()) { 2928 assert(R->isReg()); 2929 Operands.push_back(std::move(R)); 2930 return MatchOperand_Success; 2931 } 2932 return MatchOperand_ParseFail; 2933 } 2934 2935 OperandMatchResultTy 2936 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2937 auto res = parseReg(Operands); 2938 if (res != MatchOperand_NoMatch) { 2939 return res; 2940 } else if (isModifier()) { 2941 return MatchOperand_NoMatch; 2942 } else { 2943 return parseImm(Operands, HasSP3AbsMod); 2944 } 2945 } 2946 2947 bool 2948 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2949 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2950 const auto &str = Token.getString(); 2951 return str == "abs" || str == "neg" || str == "sext"; 2952 } 2953 return false; 2954 } 2955 2956 bool 2957 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2958 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2959 } 2960 2961 bool 2962 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2963 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2964 } 2965 2966 bool 2967 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2968 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2969 } 2970 2971 // Check if this is an operand modifier or an opcode modifier 2972 // which may look like an expression but it is not. We should 2973 // avoid parsing these modifiers as expressions. Currently 2974 // recognized sequences are: 2975 // |...| 2976 // abs(...) 2977 // neg(...) 2978 // sext(...) 2979 // -reg 2980 // -|...| 2981 // -abs(...) 2982 // name:... 2983 // Note that simple opcode modifiers like 'gds' may be parsed as 2984 // expressions; this is a special case. See getExpressionAsToken. 2985 // 2986 bool 2987 AMDGPUAsmParser::isModifier() { 2988 2989 AsmToken Tok = getToken(); 2990 AsmToken NextToken[2]; 2991 peekTokens(NextToken); 2992 2993 return isOperandModifier(Tok, NextToken[0]) || 2994 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2995 isOpcodeModifierWithVal(Tok, NextToken[0]); 2996 } 2997 2998 // Check if the current token is an SP3 'neg' modifier. 2999 // Currently this modifier is allowed in the following context: 3000 // 3001 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 3002 // 2. Before an 'abs' modifier: -abs(...) 3003 // 3. Before an SP3 'abs' modifier: -|...| 3004 // 3005 // In all other cases "-" is handled as a part 3006 // of an expression that follows the sign. 3007 // 3008 // Note: When "-" is followed by an integer literal, 3009 // this is interpreted as integer negation rather 3010 // than a floating-point NEG modifier applied to N. 3011 // Beside being contr-intuitive, such use of floating-point 3012 // NEG modifier would have resulted in different meaning 3013 // of integer literals used with VOP1/2/C and VOP3, 3014 // for example: 3015 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 3016 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 3017 // Negative fp literals with preceding "-" are 3018 // handled likewise for uniformity 3019 // 3020 bool 3021 AMDGPUAsmParser::parseSP3NegModifier() { 3022 3023 AsmToken NextToken[2]; 3024 peekTokens(NextToken); 3025 3026 if (isToken(AsmToken::Minus) && 3027 (isRegister(NextToken[0], NextToken[1]) || 3028 NextToken[0].is(AsmToken::Pipe) || 3029 isId(NextToken[0], "abs"))) { 3030 lex(); 3031 return true; 3032 } 3033 3034 return false; 3035 } 3036 3037 OperandMatchResultTy 3038 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 3039 bool AllowImm) { 3040 bool Neg, SP3Neg; 3041 bool Abs, SP3Abs; 3042 SMLoc Loc; 3043 3044 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 3045 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 3046 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 3047 return MatchOperand_ParseFail; 3048 } 3049 3050 SP3Neg = parseSP3NegModifier(); 3051 3052 Loc = getLoc(); 3053 Neg = trySkipId("neg"); 3054 if (Neg && SP3Neg) { 3055 Error(Loc, "expected register or immediate"); 3056 return MatchOperand_ParseFail; 3057 } 3058 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 3059 return MatchOperand_ParseFail; 3060 3061 Abs = trySkipId("abs"); 3062 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 3063 return MatchOperand_ParseFail; 3064 3065 Loc = getLoc(); 3066 SP3Abs = trySkipToken(AsmToken::Pipe); 3067 if (Abs && SP3Abs) { 3068 Error(Loc, "expected register or immediate"); 3069 return MatchOperand_ParseFail; 3070 } 3071 3072 OperandMatchResultTy Res; 3073 if (AllowImm) { 3074 Res = parseRegOrImm(Operands, SP3Abs); 3075 } else { 3076 Res = parseReg(Operands); 3077 } 3078 if (Res != MatchOperand_Success) { 3079 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 3080 } 3081 3082 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 3083 return MatchOperand_ParseFail; 3084 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3085 return MatchOperand_ParseFail; 3086 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3087 return MatchOperand_ParseFail; 3088 3089 AMDGPUOperand::Modifiers Mods; 3090 Mods.Abs = Abs || SP3Abs; 3091 Mods.Neg = Neg || SP3Neg; 3092 3093 if (Mods.hasFPModifiers()) { 3094 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3095 if (Op.isExpr()) { 3096 Error(Op.getStartLoc(), "expected an absolute expression"); 3097 return MatchOperand_ParseFail; 3098 } 3099 Op.setModifiers(Mods); 3100 } 3101 return MatchOperand_Success; 3102 } 3103 3104 OperandMatchResultTy 3105 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3106 bool AllowImm) { 3107 bool Sext = trySkipId("sext"); 3108 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3109 return MatchOperand_ParseFail; 3110 3111 OperandMatchResultTy Res; 3112 if (AllowImm) { 3113 Res = parseRegOrImm(Operands); 3114 } else { 3115 Res = parseReg(Operands); 3116 } 3117 if (Res != MatchOperand_Success) { 3118 return Sext? MatchOperand_ParseFail : Res; 3119 } 3120 3121 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3122 return MatchOperand_ParseFail; 3123 3124 AMDGPUOperand::Modifiers Mods; 3125 Mods.Sext = Sext; 3126 3127 if (Mods.hasIntModifiers()) { 3128 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3129 if (Op.isExpr()) { 3130 Error(Op.getStartLoc(), "expected an absolute expression"); 3131 return MatchOperand_ParseFail; 3132 } 3133 Op.setModifiers(Mods); 3134 } 3135 3136 return MatchOperand_Success; 3137 } 3138 3139 OperandMatchResultTy 3140 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3141 return parseRegOrImmWithFPInputMods(Operands, false); 3142 } 3143 3144 OperandMatchResultTy 3145 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3146 return parseRegOrImmWithIntInputMods(Operands, false); 3147 } 3148 3149 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3150 auto Loc = getLoc(); 3151 if (trySkipId("off")) { 3152 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3153 AMDGPUOperand::ImmTyOff, false)); 3154 return MatchOperand_Success; 3155 } 3156 3157 if (!isRegister()) 3158 return MatchOperand_NoMatch; 3159 3160 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3161 if (Reg) { 3162 Operands.push_back(std::move(Reg)); 3163 return MatchOperand_Success; 3164 } 3165 3166 return MatchOperand_ParseFail; 3167 3168 } 3169 3170 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3171 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3172 3173 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3174 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3175 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3176 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3177 return Match_InvalidOperand; 3178 3179 if ((TSFlags & SIInstrFlags::VOP3) && 3180 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3181 getForcedEncodingSize() != 64) 3182 return Match_PreferE32; 3183 3184 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3185 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3186 // v_mac_f32/16 allow only dst_sel == DWORD; 3187 auto OpNum = 3188 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3189 const auto &Op = Inst.getOperand(OpNum); 3190 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3191 return Match_InvalidOperand; 3192 } 3193 } 3194 3195 return Match_Success; 3196 } 3197 3198 static ArrayRef<unsigned> getAllVariants() { 3199 static const unsigned Variants[] = { 3200 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3201 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3202 }; 3203 3204 return makeArrayRef(Variants); 3205 } 3206 3207 // What asm variants we should check 3208 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3209 if (getForcedEncodingSize() == 32) { 3210 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3211 return makeArrayRef(Variants); 3212 } 3213 3214 if (isForcedVOP3()) { 3215 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3216 return makeArrayRef(Variants); 3217 } 3218 3219 if (isForcedSDWA()) { 3220 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3221 AMDGPUAsmVariants::SDWA9}; 3222 return makeArrayRef(Variants); 3223 } 3224 3225 if (isForcedDPP()) { 3226 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3227 return makeArrayRef(Variants); 3228 } 3229 3230 return getAllVariants(); 3231 } 3232 3233 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3234 if (getForcedEncodingSize() == 32) 3235 return "e32"; 3236 3237 if (isForcedVOP3()) 3238 return "e64"; 3239 3240 if (isForcedSDWA()) 3241 return "sdwa"; 3242 3243 if (isForcedDPP()) 3244 return "dpp"; 3245 3246 return ""; 3247 } 3248 3249 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3250 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3251 const unsigned Num = Desc.getNumImplicitUses(); 3252 for (unsigned i = 0; i < Num; ++i) { 3253 unsigned Reg = Desc.ImplicitUses[i]; 3254 switch (Reg) { 3255 case AMDGPU::FLAT_SCR: 3256 case AMDGPU::VCC: 3257 case AMDGPU::VCC_LO: 3258 case AMDGPU::VCC_HI: 3259 case AMDGPU::M0: 3260 return Reg; 3261 default: 3262 break; 3263 } 3264 } 3265 return AMDGPU::NoRegister; 3266 } 3267 3268 // NB: This code is correct only when used to check constant 3269 // bus limitations because GFX7 support no f16 inline constants. 3270 // Note that there are no cases when a GFX7 opcode violates 3271 // constant bus limitations due to the use of an f16 constant. 3272 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3273 unsigned OpIdx) const { 3274 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3275 3276 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3277 return false; 3278 } 3279 3280 const MCOperand &MO = Inst.getOperand(OpIdx); 3281 3282 int64_t Val = MO.getImm(); 3283 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3284 3285 switch (OpSize) { // expected operand size 3286 case 8: 3287 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3288 case 4: 3289 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3290 case 2: { 3291 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3292 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3293 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3294 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3295 return AMDGPU::isInlinableIntLiteral(Val); 3296 3297 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3298 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3299 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3300 return AMDGPU::isInlinableIntLiteralV216(Val); 3301 3302 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3303 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3304 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3305 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3306 3307 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3308 } 3309 default: 3310 llvm_unreachable("invalid operand size"); 3311 } 3312 } 3313 3314 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3315 if (!isGFX10Plus()) 3316 return 1; 3317 3318 switch (Opcode) { 3319 // 64-bit shift instructions can use only one scalar value input 3320 case AMDGPU::V_LSHLREV_B64_e64: 3321 case AMDGPU::V_LSHLREV_B64_gfx10: 3322 case AMDGPU::V_LSHRREV_B64_e64: 3323 case AMDGPU::V_LSHRREV_B64_gfx10: 3324 case AMDGPU::V_ASHRREV_I64_e64: 3325 case AMDGPU::V_ASHRREV_I64_gfx10: 3326 case AMDGPU::V_LSHL_B64_e64: 3327 case AMDGPU::V_LSHR_B64_e64: 3328 case AMDGPU::V_ASHR_I64_e64: 3329 return 1; 3330 default: 3331 return 2; 3332 } 3333 } 3334 3335 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3336 const MCOperand &MO = Inst.getOperand(OpIdx); 3337 if (MO.isImm()) { 3338 return !isInlineConstant(Inst, OpIdx); 3339 } else if (MO.isReg()) { 3340 auto Reg = MO.getReg(); 3341 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3342 auto PReg = mc2PseudoReg(Reg); 3343 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3344 } else { 3345 return true; 3346 } 3347 } 3348 3349 bool 3350 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3351 const OperandVector &Operands) { 3352 const unsigned Opcode = Inst.getOpcode(); 3353 const MCInstrDesc &Desc = MII.get(Opcode); 3354 unsigned LastSGPR = AMDGPU::NoRegister; 3355 unsigned ConstantBusUseCount = 0; 3356 unsigned NumLiterals = 0; 3357 unsigned LiteralSize; 3358 3359 if (Desc.TSFlags & 3360 (SIInstrFlags::VOPC | 3361 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3362 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3363 SIInstrFlags::SDWA)) { 3364 // Check special imm operands (used by madmk, etc) 3365 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3366 ++NumLiterals; 3367 LiteralSize = 4; 3368 } 3369 3370 SmallDenseSet<unsigned> SGPRsUsed; 3371 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3372 if (SGPRUsed != AMDGPU::NoRegister) { 3373 SGPRsUsed.insert(SGPRUsed); 3374 ++ConstantBusUseCount; 3375 } 3376 3377 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3378 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3379 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3380 3381 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3382 3383 for (int OpIdx : OpIndices) { 3384 if (OpIdx == -1) break; 3385 3386 const MCOperand &MO = Inst.getOperand(OpIdx); 3387 if (usesConstantBus(Inst, OpIdx)) { 3388 if (MO.isReg()) { 3389 LastSGPR = mc2PseudoReg(MO.getReg()); 3390 // Pairs of registers with a partial intersections like these 3391 // s0, s[0:1] 3392 // flat_scratch_lo, flat_scratch 3393 // flat_scratch_lo, flat_scratch_hi 3394 // are theoretically valid but they are disabled anyway. 3395 // Note that this code mimics SIInstrInfo::verifyInstruction 3396 if (!SGPRsUsed.count(LastSGPR)) { 3397 SGPRsUsed.insert(LastSGPR); 3398 ++ConstantBusUseCount; 3399 } 3400 } else { // Expression or a literal 3401 3402 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3403 continue; // special operand like VINTERP attr_chan 3404 3405 // An instruction may use only one literal. 3406 // This has been validated on the previous step. 3407 // See validateVOPLiteral. 3408 // This literal may be used as more than one operand. 3409 // If all these operands are of the same size, 3410 // this literal counts as one scalar value. 3411 // Otherwise it counts as 2 scalar values. 3412 // See "GFX10 Shader Programming", section 3.6.2.3. 3413 3414 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3415 if (Size < 4) Size = 4; 3416 3417 if (NumLiterals == 0) { 3418 NumLiterals = 1; 3419 LiteralSize = Size; 3420 } else if (LiteralSize != Size) { 3421 NumLiterals = 2; 3422 } 3423 } 3424 } 3425 } 3426 } 3427 ConstantBusUseCount += NumLiterals; 3428 3429 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3430 return true; 3431 3432 SMLoc LitLoc = getLitLoc(Operands); 3433 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3434 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3435 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3436 return false; 3437 } 3438 3439 bool 3440 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3441 const OperandVector &Operands) { 3442 const unsigned Opcode = Inst.getOpcode(); 3443 const MCInstrDesc &Desc = MII.get(Opcode); 3444 3445 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3446 if (DstIdx == -1 || 3447 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3448 return true; 3449 } 3450 3451 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3452 3453 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3454 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3455 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3456 3457 assert(DstIdx != -1); 3458 const MCOperand &Dst = Inst.getOperand(DstIdx); 3459 assert(Dst.isReg()); 3460 3461 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3462 3463 for (int SrcIdx : SrcIndices) { 3464 if (SrcIdx == -1) break; 3465 const MCOperand &Src = Inst.getOperand(SrcIdx); 3466 if (Src.isReg()) { 3467 if (TRI->regsOverlap(Dst.getReg(), Src.getReg())) { 3468 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3469 Error(getRegLoc(SrcReg, Operands), 3470 "destination must be different than all sources"); 3471 return false; 3472 } 3473 } 3474 } 3475 3476 return true; 3477 } 3478 3479 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3480 3481 const unsigned Opc = Inst.getOpcode(); 3482 const MCInstrDesc &Desc = MII.get(Opc); 3483 3484 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3485 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3486 assert(ClampIdx != -1); 3487 return Inst.getOperand(ClampIdx).getImm() == 0; 3488 } 3489 3490 return true; 3491 } 3492 3493 Optional<StringRef> AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3494 3495 const unsigned Opc = Inst.getOpcode(); 3496 const MCInstrDesc &Desc = MII.get(Opc); 3497 3498 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3499 return None; 3500 3501 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3502 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3503 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3504 3505 assert(VDataIdx != -1); 3506 3507 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3508 return None; 3509 3510 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3511 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3512 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3513 if (DMask == 0) 3514 DMask = 1; 3515 3516 bool isPackedD16 = false; 3517 unsigned DataSize = 3518 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3519 if (hasPackedD16()) { 3520 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3521 isPackedD16 = D16Idx >= 0; 3522 if (isPackedD16 && Inst.getOperand(D16Idx).getImm()) 3523 DataSize = (DataSize + 1) / 2; 3524 } 3525 3526 if ((VDataSize / 4) == DataSize + TFESize) 3527 return None; 3528 3529 return StringRef(isPackedD16 3530 ? "image data size does not match dmask, d16 and tfe" 3531 : "image data size does not match dmask and tfe"); 3532 } 3533 3534 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3535 const unsigned Opc = Inst.getOpcode(); 3536 const MCInstrDesc &Desc = MII.get(Opc); 3537 3538 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3539 return true; 3540 3541 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3542 3543 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3544 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3545 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3546 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3547 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3548 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3549 3550 assert(VAddr0Idx != -1); 3551 assert(SrsrcIdx != -1); 3552 assert(SrsrcIdx > VAddr0Idx); 3553 3554 if (DimIdx == -1) 3555 return true; // intersect_ray 3556 3557 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3558 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3559 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3560 unsigned ActualAddrSize = 3561 IsNSA ? SrsrcIdx - VAddr0Idx 3562 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3563 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3564 3565 unsigned ExpectedAddrSize = 3566 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3567 3568 if (!IsNSA) { 3569 if (ExpectedAddrSize > 8) 3570 ExpectedAddrSize = 16; 3571 3572 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3573 // This provides backward compatibility for assembly created 3574 // before 160b/192b/224b types were directly supported. 3575 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3576 return true; 3577 } 3578 3579 return ActualAddrSize == ExpectedAddrSize; 3580 } 3581 3582 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3583 3584 const unsigned Opc = Inst.getOpcode(); 3585 const MCInstrDesc &Desc = MII.get(Opc); 3586 3587 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3588 return true; 3589 if (!Desc.mayLoad() || !Desc.mayStore()) 3590 return true; // Not atomic 3591 3592 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3593 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3594 3595 // This is an incomplete check because image_atomic_cmpswap 3596 // may only use 0x3 and 0xf while other atomic operations 3597 // may use 0x1 and 0x3. However these limitations are 3598 // verified when we check that dmask matches dst size. 3599 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3600 } 3601 3602 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3603 3604 const unsigned Opc = Inst.getOpcode(); 3605 const MCInstrDesc &Desc = MII.get(Opc); 3606 3607 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3608 return true; 3609 3610 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3611 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3612 3613 // GATHER4 instructions use dmask in a different fashion compared to 3614 // other MIMG instructions. The only useful DMASK values are 3615 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3616 // (red,red,red,red) etc.) The ISA document doesn't mention 3617 // this. 3618 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3619 } 3620 3621 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3622 const unsigned Opc = Inst.getOpcode(); 3623 const MCInstrDesc &Desc = MII.get(Opc); 3624 3625 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3626 return true; 3627 3628 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3629 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3630 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3631 3632 if (!BaseOpcode->MSAA) 3633 return true; 3634 3635 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3636 assert(DimIdx != -1); 3637 3638 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3639 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3640 3641 return DimInfo->MSAA; 3642 } 3643 3644 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3645 { 3646 switch (Opcode) { 3647 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3648 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3649 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3650 return true; 3651 default: 3652 return false; 3653 } 3654 } 3655 3656 // movrels* opcodes should only allow VGPRS as src0. 3657 // This is specified in .td description for vop1/vop3, 3658 // but sdwa is handled differently. See isSDWAOperand. 3659 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3660 const OperandVector &Operands) { 3661 3662 const unsigned Opc = Inst.getOpcode(); 3663 const MCInstrDesc &Desc = MII.get(Opc); 3664 3665 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3666 return true; 3667 3668 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3669 assert(Src0Idx != -1); 3670 3671 SMLoc ErrLoc; 3672 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3673 if (Src0.isReg()) { 3674 auto Reg = mc2PseudoReg(Src0.getReg()); 3675 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3676 if (!isSGPR(Reg, TRI)) 3677 return true; 3678 ErrLoc = getRegLoc(Reg, Operands); 3679 } else { 3680 ErrLoc = getConstLoc(Operands); 3681 } 3682 3683 Error(ErrLoc, "source operand must be a VGPR"); 3684 return false; 3685 } 3686 3687 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3688 const OperandVector &Operands) { 3689 3690 const unsigned Opc = Inst.getOpcode(); 3691 3692 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3693 return true; 3694 3695 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3696 assert(Src0Idx != -1); 3697 3698 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3699 if (!Src0.isReg()) 3700 return true; 3701 3702 auto Reg = mc2PseudoReg(Src0.getReg()); 3703 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3704 if (!isGFX90A() && isSGPR(Reg, TRI)) { 3705 Error(getRegLoc(Reg, Operands), 3706 "source operand must be either a VGPR or an inline constant"); 3707 return false; 3708 } 3709 3710 return true; 3711 } 3712 3713 bool AMDGPUAsmParser::validateMFMA(const MCInst &Inst, 3714 const OperandVector &Operands) { 3715 const unsigned Opc = Inst.getOpcode(); 3716 const MCInstrDesc &Desc = MII.get(Opc); 3717 3718 if ((Desc.TSFlags & SIInstrFlags::IsMAI) == 0) 3719 return true; 3720 3721 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2); 3722 if (Src2Idx == -1) 3723 return true; 3724 3725 const MCOperand &Src2 = Inst.getOperand(Src2Idx); 3726 if (!Src2.isReg()) 3727 return true; 3728 3729 MCRegister Src2Reg = Src2.getReg(); 3730 MCRegister DstReg = Inst.getOperand(0).getReg(); 3731 if (Src2Reg == DstReg) 3732 return true; 3733 3734 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3735 if (TRI->getRegClass(Desc.OpInfo[0].RegClass).getSizeInBits() <= 128) 3736 return true; 3737 3738 if (TRI->regsOverlap(Src2Reg, DstReg)) { 3739 Error(getRegLoc(mc2PseudoReg(Src2Reg), Operands), 3740 "source 2 operand must not partially overlap with dst"); 3741 return false; 3742 } 3743 3744 return true; 3745 } 3746 3747 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3748 switch (Inst.getOpcode()) { 3749 default: 3750 return true; 3751 case V_DIV_SCALE_F32_gfx6_gfx7: 3752 case V_DIV_SCALE_F32_vi: 3753 case V_DIV_SCALE_F32_gfx10: 3754 case V_DIV_SCALE_F64_gfx6_gfx7: 3755 case V_DIV_SCALE_F64_vi: 3756 case V_DIV_SCALE_F64_gfx10: 3757 break; 3758 } 3759 3760 // TODO: Check that src0 = src1 or src2. 3761 3762 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3763 AMDGPU::OpName::src2_modifiers, 3764 AMDGPU::OpName::src2_modifiers}) { 3765 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3766 .getImm() & 3767 SISrcMods::ABS) { 3768 return false; 3769 } 3770 } 3771 3772 return true; 3773 } 3774 3775 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3776 3777 const unsigned Opc = Inst.getOpcode(); 3778 const MCInstrDesc &Desc = MII.get(Opc); 3779 3780 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3781 return true; 3782 3783 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3784 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3785 if (isCI() || isSI()) 3786 return false; 3787 } 3788 3789 return true; 3790 } 3791 3792 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3793 const unsigned Opc = Inst.getOpcode(); 3794 const MCInstrDesc &Desc = MII.get(Opc); 3795 3796 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3797 return true; 3798 3799 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3800 if (DimIdx < 0) 3801 return true; 3802 3803 long Imm = Inst.getOperand(DimIdx).getImm(); 3804 if (Imm < 0 || Imm >= 8) 3805 return false; 3806 3807 return true; 3808 } 3809 3810 static bool IsRevOpcode(const unsigned Opcode) 3811 { 3812 switch (Opcode) { 3813 case AMDGPU::V_SUBREV_F32_e32: 3814 case AMDGPU::V_SUBREV_F32_e64: 3815 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3816 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3817 case AMDGPU::V_SUBREV_F32_e32_vi: 3818 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3819 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3820 case AMDGPU::V_SUBREV_F32_e64_vi: 3821 3822 case AMDGPU::V_SUBREV_CO_U32_e32: 3823 case AMDGPU::V_SUBREV_CO_U32_e64: 3824 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3825 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3826 3827 case AMDGPU::V_SUBBREV_U32_e32: 3828 case AMDGPU::V_SUBBREV_U32_e64: 3829 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3830 case AMDGPU::V_SUBBREV_U32_e32_vi: 3831 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3832 case AMDGPU::V_SUBBREV_U32_e64_vi: 3833 3834 case AMDGPU::V_SUBREV_U32_e32: 3835 case AMDGPU::V_SUBREV_U32_e64: 3836 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3837 case AMDGPU::V_SUBREV_U32_e32_vi: 3838 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3839 case AMDGPU::V_SUBREV_U32_e64_vi: 3840 3841 case AMDGPU::V_SUBREV_F16_e32: 3842 case AMDGPU::V_SUBREV_F16_e64: 3843 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3844 case AMDGPU::V_SUBREV_F16_e32_vi: 3845 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3846 case AMDGPU::V_SUBREV_F16_e64_vi: 3847 3848 case AMDGPU::V_SUBREV_U16_e32: 3849 case AMDGPU::V_SUBREV_U16_e64: 3850 case AMDGPU::V_SUBREV_U16_e32_vi: 3851 case AMDGPU::V_SUBREV_U16_e64_vi: 3852 3853 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3854 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3855 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3856 3857 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3858 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3859 3860 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3861 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3862 3863 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3864 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3865 3866 case AMDGPU::V_LSHRREV_B32_e32: 3867 case AMDGPU::V_LSHRREV_B32_e64: 3868 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3869 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3870 case AMDGPU::V_LSHRREV_B32_e32_vi: 3871 case AMDGPU::V_LSHRREV_B32_e64_vi: 3872 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3873 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3874 3875 case AMDGPU::V_ASHRREV_I32_e32: 3876 case AMDGPU::V_ASHRREV_I32_e64: 3877 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3878 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3879 case AMDGPU::V_ASHRREV_I32_e32_vi: 3880 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3881 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3882 case AMDGPU::V_ASHRREV_I32_e64_vi: 3883 3884 case AMDGPU::V_LSHLREV_B32_e32: 3885 case AMDGPU::V_LSHLREV_B32_e64: 3886 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3887 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3888 case AMDGPU::V_LSHLREV_B32_e32_vi: 3889 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3890 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3891 case AMDGPU::V_LSHLREV_B32_e64_vi: 3892 3893 case AMDGPU::V_LSHLREV_B16_e32: 3894 case AMDGPU::V_LSHLREV_B16_e64: 3895 case AMDGPU::V_LSHLREV_B16_e32_vi: 3896 case AMDGPU::V_LSHLREV_B16_e64_vi: 3897 case AMDGPU::V_LSHLREV_B16_gfx10: 3898 3899 case AMDGPU::V_LSHRREV_B16_e32: 3900 case AMDGPU::V_LSHRREV_B16_e64: 3901 case AMDGPU::V_LSHRREV_B16_e32_vi: 3902 case AMDGPU::V_LSHRREV_B16_e64_vi: 3903 case AMDGPU::V_LSHRREV_B16_gfx10: 3904 3905 case AMDGPU::V_ASHRREV_I16_e32: 3906 case AMDGPU::V_ASHRREV_I16_e64: 3907 case AMDGPU::V_ASHRREV_I16_e32_vi: 3908 case AMDGPU::V_ASHRREV_I16_e64_vi: 3909 case AMDGPU::V_ASHRREV_I16_gfx10: 3910 3911 case AMDGPU::V_LSHLREV_B64_e64: 3912 case AMDGPU::V_LSHLREV_B64_gfx10: 3913 case AMDGPU::V_LSHLREV_B64_vi: 3914 3915 case AMDGPU::V_LSHRREV_B64_e64: 3916 case AMDGPU::V_LSHRREV_B64_gfx10: 3917 case AMDGPU::V_LSHRREV_B64_vi: 3918 3919 case AMDGPU::V_ASHRREV_I64_e64: 3920 case AMDGPU::V_ASHRREV_I64_gfx10: 3921 case AMDGPU::V_ASHRREV_I64_vi: 3922 3923 case AMDGPU::V_PK_LSHLREV_B16: 3924 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3925 case AMDGPU::V_PK_LSHLREV_B16_vi: 3926 3927 case AMDGPU::V_PK_LSHRREV_B16: 3928 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3929 case AMDGPU::V_PK_LSHRREV_B16_vi: 3930 case AMDGPU::V_PK_ASHRREV_I16: 3931 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3932 case AMDGPU::V_PK_ASHRREV_I16_vi: 3933 return true; 3934 default: 3935 return false; 3936 } 3937 } 3938 3939 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3940 3941 using namespace SIInstrFlags; 3942 const unsigned Opcode = Inst.getOpcode(); 3943 const MCInstrDesc &Desc = MII.get(Opcode); 3944 3945 // lds_direct register is defined so that it can be used 3946 // with 9-bit operands only. Ignore encodings which do not accept these. 3947 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3948 if ((Desc.TSFlags & Enc) == 0) 3949 return None; 3950 3951 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3952 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3953 if (SrcIdx == -1) 3954 break; 3955 const auto &Src = Inst.getOperand(SrcIdx); 3956 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3957 3958 if (isGFX90A()) 3959 return StringRef("lds_direct is not supported on this GPU"); 3960 3961 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3962 return StringRef("lds_direct cannot be used with this instruction"); 3963 3964 if (SrcName != OpName::src0) 3965 return StringRef("lds_direct may be used as src0 only"); 3966 } 3967 } 3968 3969 return None; 3970 } 3971 3972 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3973 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3974 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3975 if (Op.isFlatOffset()) 3976 return Op.getStartLoc(); 3977 } 3978 return getLoc(); 3979 } 3980 3981 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3982 const OperandVector &Operands) { 3983 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3984 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3985 return true; 3986 3987 auto Opcode = Inst.getOpcode(); 3988 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3989 assert(OpNum != -1); 3990 3991 const auto &Op = Inst.getOperand(OpNum); 3992 if (!hasFlatOffsets() && Op.getImm() != 0) { 3993 Error(getFlatOffsetLoc(Operands), 3994 "flat offset modifier is not supported on this GPU"); 3995 return false; 3996 } 3997 3998 // For FLAT segment the offset must be positive; 3999 // MSB is ignored and forced to zero. 4000 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 4001 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 4002 if (!isIntN(OffsetSize, Op.getImm())) { 4003 Error(getFlatOffsetLoc(Operands), 4004 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 4005 return false; 4006 } 4007 } else { 4008 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 4009 if (!isUIntN(OffsetSize, Op.getImm())) { 4010 Error(getFlatOffsetLoc(Operands), 4011 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 4012 return false; 4013 } 4014 } 4015 4016 return true; 4017 } 4018 4019 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 4020 // Start with second operand because SMEM Offset cannot be dst or src0. 4021 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 4022 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4023 if (Op.isSMEMOffset()) 4024 return Op.getStartLoc(); 4025 } 4026 return getLoc(); 4027 } 4028 4029 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 4030 const OperandVector &Operands) { 4031 if (isCI() || isSI()) 4032 return true; 4033 4034 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4035 if ((TSFlags & SIInstrFlags::SMRD) == 0) 4036 return true; 4037 4038 auto Opcode = Inst.getOpcode(); 4039 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 4040 if (OpNum == -1) 4041 return true; 4042 4043 const auto &Op = Inst.getOperand(OpNum); 4044 if (!Op.isImm()) 4045 return true; 4046 4047 uint64_t Offset = Op.getImm(); 4048 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 4049 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 4050 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 4051 return true; 4052 4053 Error(getSMEMOffsetLoc(Operands), 4054 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 4055 "expected a 21-bit signed offset"); 4056 4057 return false; 4058 } 4059 4060 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 4061 unsigned Opcode = Inst.getOpcode(); 4062 const MCInstrDesc &Desc = MII.get(Opcode); 4063 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 4064 return true; 4065 4066 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4067 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4068 4069 const int OpIndices[] = { Src0Idx, Src1Idx }; 4070 4071 unsigned NumExprs = 0; 4072 unsigned NumLiterals = 0; 4073 uint32_t LiteralValue; 4074 4075 for (int OpIdx : OpIndices) { 4076 if (OpIdx == -1) break; 4077 4078 const MCOperand &MO = Inst.getOperand(OpIdx); 4079 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 4080 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 4081 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4082 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4083 if (NumLiterals == 0 || LiteralValue != Value) { 4084 LiteralValue = Value; 4085 ++NumLiterals; 4086 } 4087 } else if (MO.isExpr()) { 4088 ++NumExprs; 4089 } 4090 } 4091 } 4092 4093 return NumLiterals + NumExprs <= 1; 4094 } 4095 4096 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 4097 const unsigned Opc = Inst.getOpcode(); 4098 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 4099 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 4100 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4101 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 4102 4103 if (OpSel & ~3) 4104 return false; 4105 } 4106 4107 if (isGFX940() && (MII.get(Opc).TSFlags & SIInstrFlags::IsDOT)) { 4108 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 4109 if (OpSelIdx != -1) { 4110 if (Inst.getOperand(OpSelIdx).getImm() != 0) 4111 return false; 4112 } 4113 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 4114 if (OpSelHiIdx != -1) { 4115 if (Inst.getOperand(OpSelHiIdx).getImm() != -1) 4116 return false; 4117 } 4118 } 4119 4120 return true; 4121 } 4122 4123 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 4124 const OperandVector &Operands) { 4125 const unsigned Opc = Inst.getOpcode(); 4126 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 4127 if (DppCtrlIdx < 0) 4128 return true; 4129 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 4130 4131 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 4132 // DPP64 is supported for row_newbcast only. 4133 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 4134 if (Src0Idx >= 0 && 4135 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 4136 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 4137 Error(S, "64 bit dpp only supports row_newbcast"); 4138 return false; 4139 } 4140 } 4141 4142 return true; 4143 } 4144 4145 // Check if VCC register matches wavefront size 4146 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4147 auto FB = getFeatureBits(); 4148 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4149 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4150 } 4151 4152 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4153 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4154 const OperandVector &Operands) { 4155 unsigned Opcode = Inst.getOpcode(); 4156 const MCInstrDesc &Desc = MII.get(Opcode); 4157 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4158 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4159 ImmIdx == -1) 4160 return true; 4161 4162 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4163 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4164 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4165 4166 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4167 4168 unsigned NumExprs = 0; 4169 unsigned NumLiterals = 0; 4170 uint32_t LiteralValue; 4171 4172 for (int OpIdx : OpIndices) { 4173 if (OpIdx == -1) 4174 continue; 4175 4176 const MCOperand &MO = Inst.getOperand(OpIdx); 4177 if (!MO.isImm() && !MO.isExpr()) 4178 continue; 4179 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4180 continue; 4181 4182 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4183 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4184 Error(getConstLoc(Operands), 4185 "inline constants are not allowed for this operand"); 4186 return false; 4187 } 4188 4189 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4190 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4191 if (NumLiterals == 0 || LiteralValue != Value) { 4192 LiteralValue = Value; 4193 ++NumLiterals; 4194 } 4195 } else if (MO.isExpr()) { 4196 ++NumExprs; 4197 } 4198 } 4199 NumLiterals += NumExprs; 4200 4201 if (!NumLiterals) 4202 return true; 4203 4204 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4205 Error(getLitLoc(Operands), "literal operands are not supported"); 4206 return false; 4207 } 4208 4209 if (NumLiterals > 1) { 4210 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4211 return false; 4212 } 4213 4214 return true; 4215 } 4216 4217 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4218 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4219 const MCRegisterInfo *MRI) { 4220 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4221 if (OpIdx < 0) 4222 return -1; 4223 4224 const MCOperand &Op = Inst.getOperand(OpIdx); 4225 if (!Op.isReg()) 4226 return -1; 4227 4228 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4229 auto Reg = Sub ? Sub : Op.getReg(); 4230 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4231 return AGPR32.contains(Reg) ? 1 : 0; 4232 } 4233 4234 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4235 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4236 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4237 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4238 SIInstrFlags::DS)) == 0) 4239 return true; 4240 4241 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4242 : AMDGPU::OpName::vdata; 4243 4244 const MCRegisterInfo *MRI = getMRI(); 4245 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4246 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4247 4248 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4249 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4250 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4251 return false; 4252 } 4253 4254 auto FB = getFeatureBits(); 4255 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4256 if (DataAreg < 0 || DstAreg < 0) 4257 return true; 4258 return DstAreg == DataAreg; 4259 } 4260 4261 return DstAreg < 1 && DataAreg < 1; 4262 } 4263 4264 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4265 auto FB = getFeatureBits(); 4266 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4267 return true; 4268 4269 const MCRegisterInfo *MRI = getMRI(); 4270 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4271 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4272 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4273 const MCOperand &Op = Inst.getOperand(I); 4274 if (!Op.isReg()) 4275 continue; 4276 4277 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4278 if (!Sub) 4279 continue; 4280 4281 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4282 return false; 4283 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4284 return false; 4285 } 4286 4287 return true; 4288 } 4289 4290 SMLoc AMDGPUAsmParser::getBLGPLoc(const OperandVector &Operands) const { 4291 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4292 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4293 if (Op.isBLGP()) 4294 return Op.getStartLoc(); 4295 } 4296 return SMLoc(); 4297 } 4298 4299 bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst, 4300 const OperandVector &Operands) { 4301 unsigned Opc = Inst.getOpcode(); 4302 int BlgpIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::blgp); 4303 if (BlgpIdx == -1) 4304 return true; 4305 SMLoc BLGPLoc = getBLGPLoc(Operands); 4306 if (!BLGPLoc.isValid()) 4307 return true; 4308 bool IsNeg = StringRef(BLGPLoc.getPointer()).startswith("neg:"); 4309 auto FB = getFeatureBits(); 4310 bool UsesNeg = false; 4311 if (FB[AMDGPU::FeatureGFX940Insts]) { 4312 switch (Opc) { 4313 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd: 4314 case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd: 4315 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd: 4316 case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd: 4317 UsesNeg = true; 4318 } 4319 } 4320 4321 if (IsNeg == UsesNeg) 4322 return true; 4323 4324 Error(BLGPLoc, 4325 UsesNeg ? "invalid modifier: blgp is not supported" 4326 : "invalid modifier: neg is not supported"); 4327 4328 return false; 4329 } 4330 4331 // gfx90a has an undocumented limitation: 4332 // DS_GWS opcodes must use even aligned registers. 4333 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4334 const OperandVector &Operands) { 4335 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4336 return true; 4337 4338 int Opc = Inst.getOpcode(); 4339 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4340 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4341 return true; 4342 4343 const MCRegisterInfo *MRI = getMRI(); 4344 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4345 int Data0Pos = 4346 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4347 assert(Data0Pos != -1); 4348 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4349 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4350 if (RegIdx & 1) { 4351 SMLoc RegLoc = getRegLoc(Reg, Operands); 4352 Error(RegLoc, "vgpr must be even aligned"); 4353 return false; 4354 } 4355 4356 return true; 4357 } 4358 4359 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4360 const OperandVector &Operands, 4361 const SMLoc &IDLoc) { 4362 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4363 AMDGPU::OpName::cpol); 4364 if (CPolPos == -1) 4365 return true; 4366 4367 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4368 4369 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4370 if ((TSFlags & (SIInstrFlags::SMRD)) && 4371 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4372 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4373 return false; 4374 } 4375 4376 if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) { 4377 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4378 StringRef CStr(S.getPointer()); 4379 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4380 Error(S, "scc is not supported on this GPU"); 4381 return false; 4382 } 4383 4384 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4385 return true; 4386 4387 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4388 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4389 Error(IDLoc, isGFX940() ? "instruction must use sc0" 4390 : "instruction must use glc"); 4391 return false; 4392 } 4393 } else { 4394 if (CPol & CPol::GLC) { 4395 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4396 StringRef CStr(S.getPointer()); 4397 S = SMLoc::getFromPointer( 4398 &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]); 4399 Error(S, isGFX940() ? "instruction must not use sc0" 4400 : "instruction must not use glc"); 4401 return false; 4402 } 4403 } 4404 4405 return true; 4406 } 4407 4408 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4409 const SMLoc &IDLoc, 4410 const OperandVector &Operands) { 4411 if (auto ErrMsg = validateLdsDirect(Inst)) { 4412 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4413 return false; 4414 } 4415 if (!validateSOPLiteral(Inst)) { 4416 Error(getLitLoc(Operands), 4417 "only one literal operand is allowed"); 4418 return false; 4419 } 4420 if (!validateVOPLiteral(Inst, Operands)) { 4421 return false; 4422 } 4423 if (!validateConstantBusLimitations(Inst, Operands)) { 4424 return false; 4425 } 4426 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4427 return false; 4428 } 4429 if (!validateIntClampSupported(Inst)) { 4430 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4431 "integer clamping is not supported on this GPU"); 4432 return false; 4433 } 4434 if (!validateOpSel(Inst)) { 4435 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4436 "invalid op_sel operand"); 4437 return false; 4438 } 4439 if (!validateDPP(Inst, Operands)) { 4440 return false; 4441 } 4442 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4443 if (!validateMIMGD16(Inst)) { 4444 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4445 "d16 modifier is not supported on this GPU"); 4446 return false; 4447 } 4448 if (!validateMIMGDim(Inst)) { 4449 Error(IDLoc, "dim modifier is required on this GPU"); 4450 return false; 4451 } 4452 if (!validateMIMGMSAA(Inst)) { 4453 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4454 "invalid dim; must be MSAA type"); 4455 return false; 4456 } 4457 if (auto ErrMsg = validateMIMGDataSize(Inst)) { 4458 Error(IDLoc, *ErrMsg); 4459 return false; 4460 } 4461 if (!validateMIMGAddrSize(Inst)) { 4462 Error(IDLoc, 4463 "image address size does not match dim and a16"); 4464 return false; 4465 } 4466 if (!validateMIMGAtomicDMask(Inst)) { 4467 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4468 "invalid atomic image dmask"); 4469 return false; 4470 } 4471 if (!validateMIMGGatherDMask(Inst)) { 4472 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4473 "invalid image_gather dmask: only one bit must be set"); 4474 return false; 4475 } 4476 if (!validateMovrels(Inst, Operands)) { 4477 return false; 4478 } 4479 if (!validateFlatOffset(Inst, Operands)) { 4480 return false; 4481 } 4482 if (!validateSMEMOffset(Inst, Operands)) { 4483 return false; 4484 } 4485 if (!validateMAIAccWrite(Inst, Operands)) { 4486 return false; 4487 } 4488 if (!validateMFMA(Inst, Operands)) { 4489 return false; 4490 } 4491 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4492 return false; 4493 } 4494 4495 if (!validateAGPRLdSt(Inst)) { 4496 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4497 ? "invalid register class: data and dst should be all VGPR or AGPR" 4498 : "invalid register class: agpr loads and stores not supported on this GPU" 4499 ); 4500 return false; 4501 } 4502 if (!validateVGPRAlign(Inst)) { 4503 Error(IDLoc, 4504 "invalid register class: vgpr tuples must be 64 bit aligned"); 4505 return false; 4506 } 4507 if (!validateGWS(Inst, Operands)) { 4508 return false; 4509 } 4510 4511 if (!validateBLGP(Inst, Operands)) { 4512 return false; 4513 } 4514 4515 if (!validateDivScale(Inst)) { 4516 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4517 return false; 4518 } 4519 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4520 return false; 4521 } 4522 4523 return true; 4524 } 4525 4526 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4527 const FeatureBitset &FBS, 4528 unsigned VariantID = 0); 4529 4530 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4531 const FeatureBitset &AvailableFeatures, 4532 unsigned VariantID); 4533 4534 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4535 const FeatureBitset &FBS) { 4536 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4537 } 4538 4539 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4540 const FeatureBitset &FBS, 4541 ArrayRef<unsigned> Variants) { 4542 for (auto Variant : Variants) { 4543 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4544 return true; 4545 } 4546 4547 return false; 4548 } 4549 4550 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4551 const SMLoc &IDLoc) { 4552 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4553 4554 // Check if requested instruction variant is supported. 4555 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4556 return false; 4557 4558 // This instruction is not supported. 4559 // Clear any other pending errors because they are no longer relevant. 4560 getParser().clearPendingErrors(); 4561 4562 // Requested instruction variant is not supported. 4563 // Check if any other variants are supported. 4564 StringRef VariantName = getMatchedVariantName(); 4565 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4566 return Error(IDLoc, 4567 Twine(VariantName, 4568 " variant of this instruction is not supported")); 4569 } 4570 4571 // Finally check if this instruction is supported on any other GPU. 4572 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4573 return Error(IDLoc, "instruction not supported on this GPU"); 4574 } 4575 4576 // Instruction not supported on any GPU. Probably a typo. 4577 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4578 return Error(IDLoc, "invalid instruction" + Suggestion); 4579 } 4580 4581 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4582 OperandVector &Operands, 4583 MCStreamer &Out, 4584 uint64_t &ErrorInfo, 4585 bool MatchingInlineAsm) { 4586 MCInst Inst; 4587 unsigned Result = Match_Success; 4588 for (auto Variant : getMatchedVariants()) { 4589 uint64_t EI; 4590 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4591 Variant); 4592 // We order match statuses from least to most specific. We use most specific 4593 // status as resulting 4594 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4595 if ((R == Match_Success) || 4596 (R == Match_PreferE32) || 4597 (R == Match_MissingFeature && Result != Match_PreferE32) || 4598 (R == Match_InvalidOperand && Result != Match_MissingFeature 4599 && Result != Match_PreferE32) || 4600 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4601 && Result != Match_MissingFeature 4602 && Result != Match_PreferE32)) { 4603 Result = R; 4604 ErrorInfo = EI; 4605 } 4606 if (R == Match_Success) 4607 break; 4608 } 4609 4610 if (Result == Match_Success) { 4611 if (!validateInstruction(Inst, IDLoc, Operands)) { 4612 return true; 4613 } 4614 Inst.setLoc(IDLoc); 4615 Out.emitInstruction(Inst, getSTI()); 4616 return false; 4617 } 4618 4619 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4620 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4621 return true; 4622 } 4623 4624 switch (Result) { 4625 default: break; 4626 case Match_MissingFeature: 4627 // It has been verified that the specified instruction 4628 // mnemonic is valid. A match was found but it requires 4629 // features which are not supported on this GPU. 4630 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4631 4632 case Match_InvalidOperand: { 4633 SMLoc ErrorLoc = IDLoc; 4634 if (ErrorInfo != ~0ULL) { 4635 if (ErrorInfo >= Operands.size()) { 4636 return Error(IDLoc, "too few operands for instruction"); 4637 } 4638 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4639 if (ErrorLoc == SMLoc()) 4640 ErrorLoc = IDLoc; 4641 } 4642 return Error(ErrorLoc, "invalid operand for instruction"); 4643 } 4644 4645 case Match_PreferE32: 4646 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4647 "should be encoded as e32"); 4648 case Match_MnemonicFail: 4649 llvm_unreachable("Invalid instructions should have been handled already"); 4650 } 4651 llvm_unreachable("Implement any new match types added!"); 4652 } 4653 4654 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4655 int64_t Tmp = -1; 4656 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4657 return true; 4658 } 4659 if (getParser().parseAbsoluteExpression(Tmp)) { 4660 return true; 4661 } 4662 Ret = static_cast<uint32_t>(Tmp); 4663 return false; 4664 } 4665 4666 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4667 uint32_t &Minor) { 4668 if (ParseAsAbsoluteExpression(Major)) 4669 return TokError("invalid major version"); 4670 4671 if (!trySkipToken(AsmToken::Comma)) 4672 return TokError("minor version number required, comma expected"); 4673 4674 if (ParseAsAbsoluteExpression(Minor)) 4675 return TokError("invalid minor version"); 4676 4677 return false; 4678 } 4679 4680 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4681 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4682 return TokError("directive only supported for amdgcn architecture"); 4683 4684 std::string TargetIDDirective; 4685 SMLoc TargetStart = getTok().getLoc(); 4686 if (getParser().parseEscapedString(TargetIDDirective)) 4687 return true; 4688 4689 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4690 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4691 return getParser().Error(TargetRange.Start, 4692 (Twine(".amdgcn_target directive's target id ") + 4693 Twine(TargetIDDirective) + 4694 Twine(" does not match the specified target id ") + 4695 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4696 4697 return false; 4698 } 4699 4700 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4701 return Error(Range.Start, "value out of range", Range); 4702 } 4703 4704 bool AMDGPUAsmParser::calculateGPRBlocks( 4705 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4706 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4707 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4708 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4709 // TODO(scott.linder): These calculations are duplicated from 4710 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4711 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4712 4713 unsigned NumVGPRs = NextFreeVGPR; 4714 unsigned NumSGPRs = NextFreeSGPR; 4715 4716 if (Version.Major >= 10) 4717 NumSGPRs = 0; 4718 else { 4719 unsigned MaxAddressableNumSGPRs = 4720 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4721 4722 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4723 NumSGPRs > MaxAddressableNumSGPRs) 4724 return OutOfRangeError(SGPRRange); 4725 4726 NumSGPRs += 4727 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4728 4729 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4730 NumSGPRs > MaxAddressableNumSGPRs) 4731 return OutOfRangeError(SGPRRange); 4732 4733 if (Features.test(FeatureSGPRInitBug)) 4734 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4735 } 4736 4737 VGPRBlocks = 4738 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4739 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4740 4741 return false; 4742 } 4743 4744 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4745 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4746 return TokError("directive only supported for amdgcn architecture"); 4747 4748 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4749 return TokError("directive only supported for amdhsa OS"); 4750 4751 StringRef KernelName; 4752 if (getParser().parseIdentifier(KernelName)) 4753 return true; 4754 4755 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4756 4757 StringSet<> Seen; 4758 4759 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4760 4761 SMRange VGPRRange; 4762 uint64_t NextFreeVGPR = 0; 4763 uint64_t AccumOffset = 0; 4764 uint64_t SharedVGPRCount = 0; 4765 SMRange SGPRRange; 4766 uint64_t NextFreeSGPR = 0; 4767 4768 // Count the number of user SGPRs implied from the enabled feature bits. 4769 unsigned ImpliedUserSGPRCount = 0; 4770 4771 // Track if the asm explicitly contains the directive for the user SGPR 4772 // count. 4773 Optional<unsigned> ExplicitUserSGPRCount; 4774 bool ReserveVCC = true; 4775 bool ReserveFlatScr = true; 4776 Optional<bool> EnableWavefrontSize32; 4777 4778 while (true) { 4779 while (trySkipToken(AsmToken::EndOfStatement)); 4780 4781 StringRef ID; 4782 SMRange IDRange = getTok().getLocRange(); 4783 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4784 return true; 4785 4786 if (ID == ".end_amdhsa_kernel") 4787 break; 4788 4789 if (Seen.find(ID) != Seen.end()) 4790 return TokError(".amdhsa_ directives cannot be repeated"); 4791 Seen.insert(ID); 4792 4793 SMLoc ValStart = getLoc(); 4794 int64_t IVal; 4795 if (getParser().parseAbsoluteExpression(IVal)) 4796 return true; 4797 SMLoc ValEnd = getLoc(); 4798 SMRange ValRange = SMRange(ValStart, ValEnd); 4799 4800 if (IVal < 0) 4801 return OutOfRangeError(ValRange); 4802 4803 uint64_t Val = IVal; 4804 4805 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4806 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4807 return OutOfRangeError(RANGE); \ 4808 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4809 4810 if (ID == ".amdhsa_group_segment_fixed_size") { 4811 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4812 return OutOfRangeError(ValRange); 4813 KD.group_segment_fixed_size = Val; 4814 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4815 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4816 return OutOfRangeError(ValRange); 4817 KD.private_segment_fixed_size = Val; 4818 } else if (ID == ".amdhsa_kernarg_size") { 4819 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4820 return OutOfRangeError(ValRange); 4821 KD.kernarg_size = Val; 4822 } else if (ID == ".amdhsa_user_sgpr_count") { 4823 ExplicitUserSGPRCount = Val; 4824 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4825 if (hasArchitectedFlatScratch()) 4826 return Error(IDRange.Start, 4827 "directive is not supported with architected flat scratch", 4828 IDRange); 4829 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4830 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4831 Val, ValRange); 4832 if (Val) 4833 ImpliedUserSGPRCount += 4; 4834 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4835 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4836 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4837 ValRange); 4838 if (Val) 4839 ImpliedUserSGPRCount += 2; 4840 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4841 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4842 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4843 ValRange); 4844 if (Val) 4845 ImpliedUserSGPRCount += 2; 4846 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4847 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4848 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4849 Val, ValRange); 4850 if (Val) 4851 ImpliedUserSGPRCount += 2; 4852 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4853 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4854 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4855 ValRange); 4856 if (Val) 4857 ImpliedUserSGPRCount += 2; 4858 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4859 if (hasArchitectedFlatScratch()) 4860 return Error(IDRange.Start, 4861 "directive is not supported with architected flat scratch", 4862 IDRange); 4863 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4864 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4865 ValRange); 4866 if (Val) 4867 ImpliedUserSGPRCount += 2; 4868 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4869 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4870 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4871 Val, ValRange); 4872 if (Val) 4873 ImpliedUserSGPRCount += 1; 4874 } else if (ID == ".amdhsa_wavefront_size32") { 4875 if (IVersion.Major < 10) 4876 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4877 EnableWavefrontSize32 = Val; 4878 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4879 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4880 Val, ValRange); 4881 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4882 if (hasArchitectedFlatScratch()) 4883 return Error(IDRange.Start, 4884 "directive is not supported with architected flat scratch", 4885 IDRange); 4886 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4887 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4888 } else if (ID == ".amdhsa_enable_private_segment") { 4889 if (!hasArchitectedFlatScratch()) 4890 return Error( 4891 IDRange.Start, 4892 "directive is not supported without architected flat scratch", 4893 IDRange); 4894 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4895 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4896 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4897 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4898 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4899 ValRange); 4900 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4901 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4902 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4903 ValRange); 4904 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4905 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4906 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4907 ValRange); 4908 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4909 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4910 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4911 ValRange); 4912 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4913 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4914 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4915 ValRange); 4916 } else if (ID == ".amdhsa_next_free_vgpr") { 4917 VGPRRange = ValRange; 4918 NextFreeVGPR = Val; 4919 } else if (ID == ".amdhsa_next_free_sgpr") { 4920 SGPRRange = ValRange; 4921 NextFreeSGPR = Val; 4922 } else if (ID == ".amdhsa_accum_offset") { 4923 if (!isGFX90A()) 4924 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4925 AccumOffset = Val; 4926 } else if (ID == ".amdhsa_reserve_vcc") { 4927 if (!isUInt<1>(Val)) 4928 return OutOfRangeError(ValRange); 4929 ReserveVCC = Val; 4930 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4931 if (IVersion.Major < 7) 4932 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4933 if (hasArchitectedFlatScratch()) 4934 return Error(IDRange.Start, 4935 "directive is not supported with architected flat scratch", 4936 IDRange); 4937 if (!isUInt<1>(Val)) 4938 return OutOfRangeError(ValRange); 4939 ReserveFlatScr = Val; 4940 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4941 if (IVersion.Major < 8) 4942 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4943 if (!isUInt<1>(Val)) 4944 return OutOfRangeError(ValRange); 4945 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4946 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4947 IDRange); 4948 } else if (ID == ".amdhsa_float_round_mode_32") { 4949 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4950 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4951 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4952 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4953 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4954 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4955 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4956 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4957 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4958 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4959 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4960 ValRange); 4961 } else if (ID == ".amdhsa_dx10_clamp") { 4962 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4963 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4964 } else if (ID == ".amdhsa_ieee_mode") { 4965 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4966 Val, ValRange); 4967 } else if (ID == ".amdhsa_fp16_overflow") { 4968 if (IVersion.Major < 9) 4969 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4970 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4971 ValRange); 4972 } else if (ID == ".amdhsa_tg_split") { 4973 if (!isGFX90A()) 4974 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4975 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4976 ValRange); 4977 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4978 if (IVersion.Major < 10) 4979 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4980 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4981 ValRange); 4982 } else if (ID == ".amdhsa_memory_ordered") { 4983 if (IVersion.Major < 10) 4984 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4985 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4986 ValRange); 4987 } else if (ID == ".amdhsa_forward_progress") { 4988 if (IVersion.Major < 10) 4989 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4990 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4991 ValRange); 4992 } else if (ID == ".amdhsa_shared_vgpr_count") { 4993 if (IVersion.Major < 10) 4994 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4995 SharedVGPRCount = Val; 4996 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, 4997 COMPUTE_PGM_RSRC3_GFX10_SHARED_VGPR_COUNT, Val, 4998 ValRange); 4999 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 5000 PARSE_BITS_ENTRY( 5001 KD.compute_pgm_rsrc2, 5002 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 5003 ValRange); 5004 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 5005 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5006 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 5007 Val, ValRange); 5008 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 5009 PARSE_BITS_ENTRY( 5010 KD.compute_pgm_rsrc2, 5011 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 5012 ValRange); 5013 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 5014 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5015 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 5016 Val, ValRange); 5017 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 5018 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5019 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 5020 Val, ValRange); 5021 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 5022 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5023 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 5024 Val, ValRange); 5025 } else if (ID == ".amdhsa_exception_int_div_zero") { 5026 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 5027 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 5028 Val, ValRange); 5029 } else { 5030 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 5031 } 5032 5033 #undef PARSE_BITS_ENTRY 5034 } 5035 5036 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 5037 return TokError(".amdhsa_next_free_vgpr directive is required"); 5038 5039 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 5040 return TokError(".amdhsa_next_free_sgpr directive is required"); 5041 5042 unsigned VGPRBlocks; 5043 unsigned SGPRBlocks; 5044 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 5045 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 5046 EnableWavefrontSize32, NextFreeVGPR, 5047 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 5048 SGPRBlocks)) 5049 return true; 5050 5051 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 5052 VGPRBlocks)) 5053 return OutOfRangeError(VGPRRange); 5054 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5055 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 5056 5057 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 5058 SGPRBlocks)) 5059 return OutOfRangeError(SGPRRange); 5060 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 5061 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 5062 SGPRBlocks); 5063 5064 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 5065 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 5066 "enabled user SGPRs"); 5067 5068 unsigned UserSGPRCount = 5069 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 5070 5071 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 5072 return TokError("too many user SGPRs enabled"); 5073 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 5074 UserSGPRCount); 5075 5076 if (isGFX90A()) { 5077 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 5078 return TokError(".amdhsa_accum_offset directive is required"); 5079 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 5080 return TokError("accum_offset should be in range [4..256] in " 5081 "increments of 4"); 5082 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 5083 return TokError("accum_offset exceeds total VGPR allocation"); 5084 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 5085 (AccumOffset / 4 - 1)); 5086 } 5087 5088 if (IVersion.Major == 10) { 5089 // SharedVGPRCount < 16 checked by PARSE_ENTRY_BITS 5090 if (SharedVGPRCount && EnableWavefrontSize32) { 5091 return TokError("shared_vgpr_count directive not valid on " 5092 "wavefront size 32"); 5093 } 5094 if (SharedVGPRCount * 2 + VGPRBlocks > 63) { 5095 return TokError("shared_vgpr_count*2 + " 5096 "compute_pgm_rsrc1.GRANULATED_WORKITEM_VGPR_COUNT cannot " 5097 "exceed 63\n"); 5098 } 5099 } 5100 5101 getTargetStreamer().EmitAmdhsaKernelDescriptor( 5102 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 5103 ReserveFlatScr); 5104 return false; 5105 } 5106 5107 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 5108 uint32_t Major; 5109 uint32_t Minor; 5110 5111 if (ParseDirectiveMajorMinor(Major, Minor)) 5112 return true; 5113 5114 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 5115 return false; 5116 } 5117 5118 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 5119 uint32_t Major; 5120 uint32_t Minor; 5121 uint32_t Stepping; 5122 StringRef VendorName; 5123 StringRef ArchName; 5124 5125 // If this directive has no arguments, then use the ISA version for the 5126 // targeted GPU. 5127 if (isToken(AsmToken::EndOfStatement)) { 5128 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5129 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 5130 ISA.Stepping, 5131 "AMD", "AMDGPU"); 5132 return false; 5133 } 5134 5135 if (ParseDirectiveMajorMinor(Major, Minor)) 5136 return true; 5137 5138 if (!trySkipToken(AsmToken::Comma)) 5139 return TokError("stepping version number required, comma expected"); 5140 5141 if (ParseAsAbsoluteExpression(Stepping)) 5142 return TokError("invalid stepping version"); 5143 5144 if (!trySkipToken(AsmToken::Comma)) 5145 return TokError("vendor name required, comma expected"); 5146 5147 if (!parseString(VendorName, "invalid vendor name")) 5148 return true; 5149 5150 if (!trySkipToken(AsmToken::Comma)) 5151 return TokError("arch name required, comma expected"); 5152 5153 if (!parseString(ArchName, "invalid arch name")) 5154 return true; 5155 5156 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 5157 VendorName, ArchName); 5158 return false; 5159 } 5160 5161 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 5162 amd_kernel_code_t &Header) { 5163 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 5164 // assembly for backwards compatibility. 5165 if (ID == "max_scratch_backing_memory_byte_size") { 5166 Parser.eatToEndOfStatement(); 5167 return false; 5168 } 5169 5170 SmallString<40> ErrStr; 5171 raw_svector_ostream Err(ErrStr); 5172 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 5173 return TokError(Err.str()); 5174 } 5175 Lex(); 5176 5177 if (ID == "enable_wavefront_size32") { 5178 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 5179 if (!isGFX10Plus()) 5180 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 5181 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5182 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 5183 } else { 5184 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5185 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 5186 } 5187 } 5188 5189 if (ID == "wavefront_size") { 5190 if (Header.wavefront_size == 5) { 5191 if (!isGFX10Plus()) 5192 return TokError("wavefront_size=5 is only allowed on GFX10+"); 5193 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 5194 return TokError("wavefront_size=5 requires +WavefrontSize32"); 5195 } else if (Header.wavefront_size == 6) { 5196 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 5197 return TokError("wavefront_size=6 requires +WavefrontSize64"); 5198 } 5199 } 5200 5201 if (ID == "enable_wgp_mode") { 5202 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 5203 !isGFX10Plus()) 5204 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 5205 } 5206 5207 if (ID == "enable_mem_ordered") { 5208 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 5209 !isGFX10Plus()) 5210 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 5211 } 5212 5213 if (ID == "enable_fwd_progress") { 5214 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 5215 !isGFX10Plus()) 5216 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5217 } 5218 5219 return false; 5220 } 5221 5222 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5223 amd_kernel_code_t Header; 5224 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5225 5226 while (true) { 5227 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5228 // will set the current token to EndOfStatement. 5229 while(trySkipToken(AsmToken::EndOfStatement)); 5230 5231 StringRef ID; 5232 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5233 return true; 5234 5235 if (ID == ".end_amd_kernel_code_t") 5236 break; 5237 5238 if (ParseAMDKernelCodeTValue(ID, Header)) 5239 return true; 5240 } 5241 5242 getTargetStreamer().EmitAMDKernelCodeT(Header); 5243 5244 return false; 5245 } 5246 5247 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5248 StringRef KernelName; 5249 if (!parseId(KernelName, "expected symbol name")) 5250 return true; 5251 5252 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5253 ELF::STT_AMDGPU_HSA_KERNEL); 5254 5255 KernelScope.initialize(getContext()); 5256 return false; 5257 } 5258 5259 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5260 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5261 return Error(getLoc(), 5262 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5263 "architectures"); 5264 } 5265 5266 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5267 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5268 return Error(getParser().getTok().getLoc(), "target id must match options"); 5269 5270 getTargetStreamer().EmitISAVersion(); 5271 Lex(); 5272 5273 return false; 5274 } 5275 5276 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5277 const char *AssemblerDirectiveBegin; 5278 const char *AssemblerDirectiveEnd; 5279 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5280 isHsaAbiVersion3AndAbove(&getSTI()) 5281 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5282 HSAMD::V3::AssemblerDirectiveEnd) 5283 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5284 HSAMD::AssemblerDirectiveEnd); 5285 5286 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5287 return Error(getLoc(), 5288 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5289 "not available on non-amdhsa OSes")).str()); 5290 } 5291 5292 std::string HSAMetadataString; 5293 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5294 HSAMetadataString)) 5295 return true; 5296 5297 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5298 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5299 return Error(getLoc(), "invalid HSA metadata"); 5300 } else { 5301 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5302 return Error(getLoc(), "invalid HSA metadata"); 5303 } 5304 5305 return false; 5306 } 5307 5308 /// Common code to parse out a block of text (typically YAML) between start and 5309 /// end directives. 5310 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5311 const char *AssemblerDirectiveEnd, 5312 std::string &CollectString) { 5313 5314 raw_string_ostream CollectStream(CollectString); 5315 5316 getLexer().setSkipSpace(false); 5317 5318 bool FoundEnd = false; 5319 while (!isToken(AsmToken::Eof)) { 5320 while (isToken(AsmToken::Space)) { 5321 CollectStream << getTokenStr(); 5322 Lex(); 5323 } 5324 5325 if (trySkipId(AssemblerDirectiveEnd)) { 5326 FoundEnd = true; 5327 break; 5328 } 5329 5330 CollectStream << Parser.parseStringToEndOfStatement() 5331 << getContext().getAsmInfo()->getSeparatorString(); 5332 5333 Parser.eatToEndOfStatement(); 5334 } 5335 5336 getLexer().setSkipSpace(true); 5337 5338 if (isToken(AsmToken::Eof) && !FoundEnd) { 5339 return TokError(Twine("expected directive ") + 5340 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5341 } 5342 5343 CollectStream.flush(); 5344 return false; 5345 } 5346 5347 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5348 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5349 std::string String; 5350 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5351 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5352 return true; 5353 5354 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5355 if (!PALMetadata->setFromString(String)) 5356 return Error(getLoc(), "invalid PAL metadata"); 5357 return false; 5358 } 5359 5360 /// Parse the assembler directive for old linear-format PAL metadata. 5361 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5362 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5363 return Error(getLoc(), 5364 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5365 "not available on non-amdpal OSes")).str()); 5366 } 5367 5368 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5369 PALMetadata->setLegacy(); 5370 for (;;) { 5371 uint32_t Key, Value; 5372 if (ParseAsAbsoluteExpression(Key)) { 5373 return TokError(Twine("invalid value in ") + 5374 Twine(PALMD::AssemblerDirective)); 5375 } 5376 if (!trySkipToken(AsmToken::Comma)) { 5377 return TokError(Twine("expected an even number of values in ") + 5378 Twine(PALMD::AssemblerDirective)); 5379 } 5380 if (ParseAsAbsoluteExpression(Value)) { 5381 return TokError(Twine("invalid value in ") + 5382 Twine(PALMD::AssemblerDirective)); 5383 } 5384 PALMetadata->setRegister(Key, Value); 5385 if (!trySkipToken(AsmToken::Comma)) 5386 break; 5387 } 5388 return false; 5389 } 5390 5391 /// ParseDirectiveAMDGPULDS 5392 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5393 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5394 if (getParser().checkForValidSection()) 5395 return true; 5396 5397 StringRef Name; 5398 SMLoc NameLoc = getLoc(); 5399 if (getParser().parseIdentifier(Name)) 5400 return TokError("expected identifier in directive"); 5401 5402 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5403 if (parseToken(AsmToken::Comma, "expected ','")) 5404 return true; 5405 5406 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5407 5408 int64_t Size; 5409 SMLoc SizeLoc = getLoc(); 5410 if (getParser().parseAbsoluteExpression(Size)) 5411 return true; 5412 if (Size < 0) 5413 return Error(SizeLoc, "size must be non-negative"); 5414 if (Size > LocalMemorySize) 5415 return Error(SizeLoc, "size is too large"); 5416 5417 int64_t Alignment = 4; 5418 if (trySkipToken(AsmToken::Comma)) { 5419 SMLoc AlignLoc = getLoc(); 5420 if (getParser().parseAbsoluteExpression(Alignment)) 5421 return true; 5422 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5423 return Error(AlignLoc, "alignment must be a power of two"); 5424 5425 // Alignment larger than the size of LDS is possible in theory, as long 5426 // as the linker manages to place to symbol at address 0, but we do want 5427 // to make sure the alignment fits nicely into a 32-bit integer. 5428 if (Alignment >= 1u << 31) 5429 return Error(AlignLoc, "alignment is too large"); 5430 } 5431 5432 if (parseToken(AsmToken::EndOfStatement, 5433 "unexpected token in '.amdgpu_lds' directive")) 5434 return true; 5435 5436 Symbol->redefineIfPossible(); 5437 if (!Symbol->isUndefined()) 5438 return Error(NameLoc, "invalid symbol redefinition"); 5439 5440 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5441 return false; 5442 } 5443 5444 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5445 StringRef IDVal = DirectiveID.getString(); 5446 5447 if (isHsaAbiVersion3AndAbove(&getSTI())) { 5448 if (IDVal == ".amdhsa_kernel") 5449 return ParseDirectiveAMDHSAKernel(); 5450 5451 // TODO: Restructure/combine with PAL metadata directive. 5452 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5453 return ParseDirectiveHSAMetadata(); 5454 } else { 5455 if (IDVal == ".hsa_code_object_version") 5456 return ParseDirectiveHSACodeObjectVersion(); 5457 5458 if (IDVal == ".hsa_code_object_isa") 5459 return ParseDirectiveHSACodeObjectISA(); 5460 5461 if (IDVal == ".amd_kernel_code_t") 5462 return ParseDirectiveAMDKernelCodeT(); 5463 5464 if (IDVal == ".amdgpu_hsa_kernel") 5465 return ParseDirectiveAMDGPUHsaKernel(); 5466 5467 if (IDVal == ".amd_amdgpu_isa") 5468 return ParseDirectiveISAVersion(); 5469 5470 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5471 return ParseDirectiveHSAMetadata(); 5472 } 5473 5474 if (IDVal == ".amdgcn_target") 5475 return ParseDirectiveAMDGCNTarget(); 5476 5477 if (IDVal == ".amdgpu_lds") 5478 return ParseDirectiveAMDGPULDS(); 5479 5480 if (IDVal == PALMD::AssemblerDirectiveBegin) 5481 return ParseDirectivePALMetadataBegin(); 5482 5483 if (IDVal == PALMD::AssemblerDirective) 5484 return ParseDirectivePALMetadata(); 5485 5486 return true; 5487 } 5488 5489 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5490 unsigned RegNo) { 5491 5492 if (MRI.regsOverlap(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, RegNo)) 5493 return isGFX9Plus(); 5494 5495 // GFX10 has 2 more SGPRs 104 and 105. 5496 if (MRI.regsOverlap(AMDGPU::SGPR104_SGPR105, RegNo)) 5497 return hasSGPR104_SGPR105(); 5498 5499 switch (RegNo) { 5500 case AMDGPU::SRC_SHARED_BASE: 5501 case AMDGPU::SRC_SHARED_LIMIT: 5502 case AMDGPU::SRC_PRIVATE_BASE: 5503 case AMDGPU::SRC_PRIVATE_LIMIT: 5504 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5505 return isGFX9Plus(); 5506 case AMDGPU::TBA: 5507 case AMDGPU::TBA_LO: 5508 case AMDGPU::TBA_HI: 5509 case AMDGPU::TMA: 5510 case AMDGPU::TMA_LO: 5511 case AMDGPU::TMA_HI: 5512 return !isGFX9Plus(); 5513 case AMDGPU::XNACK_MASK: 5514 case AMDGPU::XNACK_MASK_LO: 5515 case AMDGPU::XNACK_MASK_HI: 5516 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5517 case AMDGPU::SGPR_NULL: 5518 return isGFX10Plus(); 5519 default: 5520 break; 5521 } 5522 5523 if (isCI()) 5524 return true; 5525 5526 if (isSI() || isGFX10Plus()) { 5527 // No flat_scr on SI. 5528 // On GFX10 flat scratch is not a valid register operand and can only be 5529 // accessed with s_setreg/s_getreg. 5530 switch (RegNo) { 5531 case AMDGPU::FLAT_SCR: 5532 case AMDGPU::FLAT_SCR_LO: 5533 case AMDGPU::FLAT_SCR_HI: 5534 return false; 5535 default: 5536 return true; 5537 } 5538 } 5539 5540 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5541 // SI/CI have. 5542 if (MRI.regsOverlap(AMDGPU::SGPR102_SGPR103, RegNo)) 5543 return hasSGPR102_SGPR103(); 5544 5545 return true; 5546 } 5547 5548 OperandMatchResultTy 5549 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5550 OperandMode Mode) { 5551 // Try to parse with a custom parser 5552 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5553 5554 // If we successfully parsed the operand or if there as an error parsing, 5555 // we are done. 5556 // 5557 // If we are parsing after we reach EndOfStatement then this means we 5558 // are appending default values to the Operands list. This is only done 5559 // by custom parser, so we shouldn't continue on to the generic parsing. 5560 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5561 isToken(AsmToken::EndOfStatement)) 5562 return ResTy; 5563 5564 SMLoc RBraceLoc; 5565 SMLoc LBraceLoc = getLoc(); 5566 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5567 unsigned Prefix = Operands.size(); 5568 5569 for (;;) { 5570 auto Loc = getLoc(); 5571 ResTy = parseReg(Operands); 5572 if (ResTy == MatchOperand_NoMatch) 5573 Error(Loc, "expected a register"); 5574 if (ResTy != MatchOperand_Success) 5575 return MatchOperand_ParseFail; 5576 5577 RBraceLoc = getLoc(); 5578 if (trySkipToken(AsmToken::RBrac)) 5579 break; 5580 5581 if (!skipToken(AsmToken::Comma, 5582 "expected a comma or a closing square bracket")) { 5583 return MatchOperand_ParseFail; 5584 } 5585 } 5586 5587 if (Operands.size() - Prefix > 1) { 5588 Operands.insert(Operands.begin() + Prefix, 5589 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5590 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5591 } 5592 5593 return MatchOperand_Success; 5594 } 5595 5596 return parseRegOrImm(Operands); 5597 } 5598 5599 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5600 // Clear any forced encodings from the previous instruction. 5601 setForcedEncodingSize(0); 5602 setForcedDPP(false); 5603 setForcedSDWA(false); 5604 5605 if (Name.endswith("_e64")) { 5606 setForcedEncodingSize(64); 5607 return Name.substr(0, Name.size() - 4); 5608 } else if (Name.endswith("_e32")) { 5609 setForcedEncodingSize(32); 5610 return Name.substr(0, Name.size() - 4); 5611 } else if (Name.endswith("_dpp")) { 5612 setForcedDPP(true); 5613 return Name.substr(0, Name.size() - 4); 5614 } else if (Name.endswith("_sdwa")) { 5615 setForcedSDWA(true); 5616 return Name.substr(0, Name.size() - 5); 5617 } 5618 return Name; 5619 } 5620 5621 static void applyMnemonicAliases(StringRef &Mnemonic, 5622 const FeatureBitset &Features, 5623 unsigned VariantID); 5624 5625 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5626 StringRef Name, 5627 SMLoc NameLoc, OperandVector &Operands) { 5628 // Add the instruction mnemonic 5629 Name = parseMnemonicSuffix(Name); 5630 5631 // If the target architecture uses MnemonicAlias, call it here to parse 5632 // operands correctly. 5633 applyMnemonicAliases(Name, getAvailableFeatures(), 0); 5634 5635 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5636 5637 bool IsMIMG = Name.startswith("image_"); 5638 5639 while (!trySkipToken(AsmToken::EndOfStatement)) { 5640 OperandMode Mode = OperandMode_Default; 5641 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5642 Mode = OperandMode_NSA; 5643 CPolSeen = 0; 5644 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5645 5646 if (Res != MatchOperand_Success) { 5647 checkUnsupportedInstruction(Name, NameLoc); 5648 if (!Parser.hasPendingError()) { 5649 // FIXME: use real operand location rather than the current location. 5650 StringRef Msg = 5651 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5652 "not a valid operand."; 5653 Error(getLoc(), Msg); 5654 } 5655 while (!trySkipToken(AsmToken::EndOfStatement)) { 5656 lex(); 5657 } 5658 return true; 5659 } 5660 5661 // Eat the comma or space if there is one. 5662 trySkipToken(AsmToken::Comma); 5663 } 5664 5665 return false; 5666 } 5667 5668 //===----------------------------------------------------------------------===// 5669 // Utility functions 5670 //===----------------------------------------------------------------------===// 5671 5672 OperandMatchResultTy 5673 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5674 5675 if (!trySkipId(Prefix, AsmToken::Colon)) 5676 return MatchOperand_NoMatch; 5677 5678 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5679 } 5680 5681 OperandMatchResultTy 5682 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5683 AMDGPUOperand::ImmTy ImmTy, 5684 bool (*ConvertResult)(int64_t&)) { 5685 SMLoc S = getLoc(); 5686 int64_t Value = 0; 5687 5688 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5689 if (Res != MatchOperand_Success) 5690 return Res; 5691 5692 if (ConvertResult && !ConvertResult(Value)) { 5693 Error(S, "invalid " + StringRef(Prefix) + " value."); 5694 } 5695 5696 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5697 return MatchOperand_Success; 5698 } 5699 5700 OperandMatchResultTy 5701 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5702 OperandVector &Operands, 5703 AMDGPUOperand::ImmTy ImmTy, 5704 bool (*ConvertResult)(int64_t&)) { 5705 SMLoc S = getLoc(); 5706 if (!trySkipId(Prefix, AsmToken::Colon)) 5707 return MatchOperand_NoMatch; 5708 5709 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5710 return MatchOperand_ParseFail; 5711 5712 unsigned Val = 0; 5713 const unsigned MaxSize = 4; 5714 5715 // FIXME: How to verify the number of elements matches the number of src 5716 // operands? 5717 for (int I = 0; ; ++I) { 5718 int64_t Op; 5719 SMLoc Loc = getLoc(); 5720 if (!parseExpr(Op)) 5721 return MatchOperand_ParseFail; 5722 5723 if (Op != 0 && Op != 1) { 5724 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5725 return MatchOperand_ParseFail; 5726 } 5727 5728 Val |= (Op << I); 5729 5730 if (trySkipToken(AsmToken::RBrac)) 5731 break; 5732 5733 if (I + 1 == MaxSize) { 5734 Error(getLoc(), "expected a closing square bracket"); 5735 return MatchOperand_ParseFail; 5736 } 5737 5738 if (!skipToken(AsmToken::Comma, "expected a comma")) 5739 return MatchOperand_ParseFail; 5740 } 5741 5742 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5743 return MatchOperand_Success; 5744 } 5745 5746 OperandMatchResultTy 5747 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5748 AMDGPUOperand::ImmTy ImmTy) { 5749 int64_t Bit; 5750 SMLoc S = getLoc(); 5751 5752 if (trySkipId(Name)) { 5753 Bit = 1; 5754 } else if (trySkipId("no", Name)) { 5755 Bit = 0; 5756 } else { 5757 return MatchOperand_NoMatch; 5758 } 5759 5760 if (Name == "r128" && !hasMIMG_R128()) { 5761 Error(S, "r128 modifier is not supported on this GPU"); 5762 return MatchOperand_ParseFail; 5763 } 5764 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5765 Error(S, "a16 modifier is not supported on this GPU"); 5766 return MatchOperand_ParseFail; 5767 } 5768 5769 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5770 ImmTy = AMDGPUOperand::ImmTyR128A16; 5771 5772 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5773 return MatchOperand_Success; 5774 } 5775 5776 OperandMatchResultTy 5777 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5778 unsigned CPolOn = 0; 5779 unsigned CPolOff = 0; 5780 SMLoc S = getLoc(); 5781 5782 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 5783 if (isGFX940() && !Mnemo.startswith("s_")) { 5784 if (trySkipId("sc0")) 5785 CPolOn = AMDGPU::CPol::SC0; 5786 else if (trySkipId("nosc0")) 5787 CPolOff = AMDGPU::CPol::SC0; 5788 else if (trySkipId("nt")) 5789 CPolOn = AMDGPU::CPol::NT; 5790 else if (trySkipId("nont")) 5791 CPolOff = AMDGPU::CPol::NT; 5792 else if (trySkipId("sc1")) 5793 CPolOn = AMDGPU::CPol::SC1; 5794 else if (trySkipId("nosc1")) 5795 CPolOff = AMDGPU::CPol::SC1; 5796 else 5797 return MatchOperand_NoMatch; 5798 } 5799 else if (trySkipId("glc")) 5800 CPolOn = AMDGPU::CPol::GLC; 5801 else if (trySkipId("noglc")) 5802 CPolOff = AMDGPU::CPol::GLC; 5803 else if (trySkipId("slc")) 5804 CPolOn = AMDGPU::CPol::SLC; 5805 else if (trySkipId("noslc")) 5806 CPolOff = AMDGPU::CPol::SLC; 5807 else if (trySkipId("dlc")) 5808 CPolOn = AMDGPU::CPol::DLC; 5809 else if (trySkipId("nodlc")) 5810 CPolOff = AMDGPU::CPol::DLC; 5811 else if (trySkipId("scc")) 5812 CPolOn = AMDGPU::CPol::SCC; 5813 else if (trySkipId("noscc")) 5814 CPolOff = AMDGPU::CPol::SCC; 5815 else 5816 return MatchOperand_NoMatch; 5817 5818 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5819 Error(S, "dlc modifier is not supported on this GPU"); 5820 return MatchOperand_ParseFail; 5821 } 5822 5823 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5824 Error(S, "scc modifier is not supported on this GPU"); 5825 return MatchOperand_ParseFail; 5826 } 5827 5828 if (CPolSeen & (CPolOn | CPolOff)) { 5829 Error(S, "duplicate cache policy modifier"); 5830 return MatchOperand_ParseFail; 5831 } 5832 5833 CPolSeen |= (CPolOn | CPolOff); 5834 5835 for (unsigned I = 1; I != Operands.size(); ++I) { 5836 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5837 if (Op.isCPol()) { 5838 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5839 return MatchOperand_Success; 5840 } 5841 } 5842 5843 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5844 AMDGPUOperand::ImmTyCPol)); 5845 5846 return MatchOperand_Success; 5847 } 5848 5849 static void addOptionalImmOperand( 5850 MCInst& Inst, const OperandVector& Operands, 5851 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5852 AMDGPUOperand::ImmTy ImmT, 5853 int64_t Default = 0) { 5854 auto i = OptionalIdx.find(ImmT); 5855 if (i != OptionalIdx.end()) { 5856 unsigned Idx = i->second; 5857 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5858 } else { 5859 Inst.addOperand(MCOperand::createImm(Default)); 5860 } 5861 } 5862 5863 OperandMatchResultTy 5864 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5865 StringRef &Value, 5866 SMLoc &StringLoc) { 5867 if (!trySkipId(Prefix, AsmToken::Colon)) 5868 return MatchOperand_NoMatch; 5869 5870 StringLoc = getLoc(); 5871 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5872 : MatchOperand_ParseFail; 5873 } 5874 5875 //===----------------------------------------------------------------------===// 5876 // MTBUF format 5877 //===----------------------------------------------------------------------===// 5878 5879 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5880 int64_t MaxVal, 5881 int64_t &Fmt) { 5882 int64_t Val; 5883 SMLoc Loc = getLoc(); 5884 5885 auto Res = parseIntWithPrefix(Pref, Val); 5886 if (Res == MatchOperand_ParseFail) 5887 return false; 5888 if (Res == MatchOperand_NoMatch) 5889 return true; 5890 5891 if (Val < 0 || Val > MaxVal) { 5892 Error(Loc, Twine("out of range ", StringRef(Pref))); 5893 return false; 5894 } 5895 5896 Fmt = Val; 5897 return true; 5898 } 5899 5900 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5901 // values to live in a joint format operand in the MCInst encoding. 5902 OperandMatchResultTy 5903 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5904 using namespace llvm::AMDGPU::MTBUFFormat; 5905 5906 int64_t Dfmt = DFMT_UNDEF; 5907 int64_t Nfmt = NFMT_UNDEF; 5908 5909 // dfmt and nfmt can appear in either order, and each is optional. 5910 for (int I = 0; I < 2; ++I) { 5911 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5912 return MatchOperand_ParseFail; 5913 5914 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5915 return MatchOperand_ParseFail; 5916 } 5917 // Skip optional comma between dfmt/nfmt 5918 // but guard against 2 commas following each other. 5919 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5920 !peekToken().is(AsmToken::Comma)) { 5921 trySkipToken(AsmToken::Comma); 5922 } 5923 } 5924 5925 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5926 return MatchOperand_NoMatch; 5927 5928 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5929 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5930 5931 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5932 return MatchOperand_Success; 5933 } 5934 5935 OperandMatchResultTy 5936 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5937 using namespace llvm::AMDGPU::MTBUFFormat; 5938 5939 int64_t Fmt = UFMT_UNDEF; 5940 5941 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5942 return MatchOperand_ParseFail; 5943 5944 if (Fmt == UFMT_UNDEF) 5945 return MatchOperand_NoMatch; 5946 5947 Format = Fmt; 5948 return MatchOperand_Success; 5949 } 5950 5951 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5952 int64_t &Nfmt, 5953 StringRef FormatStr, 5954 SMLoc Loc) { 5955 using namespace llvm::AMDGPU::MTBUFFormat; 5956 int64_t Format; 5957 5958 Format = getDfmt(FormatStr); 5959 if (Format != DFMT_UNDEF) { 5960 Dfmt = Format; 5961 return true; 5962 } 5963 5964 Format = getNfmt(FormatStr, getSTI()); 5965 if (Format != NFMT_UNDEF) { 5966 Nfmt = Format; 5967 return true; 5968 } 5969 5970 Error(Loc, "unsupported format"); 5971 return false; 5972 } 5973 5974 OperandMatchResultTy 5975 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5976 SMLoc FormatLoc, 5977 int64_t &Format) { 5978 using namespace llvm::AMDGPU::MTBUFFormat; 5979 5980 int64_t Dfmt = DFMT_UNDEF; 5981 int64_t Nfmt = NFMT_UNDEF; 5982 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5983 return MatchOperand_ParseFail; 5984 5985 if (trySkipToken(AsmToken::Comma)) { 5986 StringRef Str; 5987 SMLoc Loc = getLoc(); 5988 if (!parseId(Str, "expected a format string") || 5989 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5990 return MatchOperand_ParseFail; 5991 } 5992 if (Dfmt == DFMT_UNDEF) { 5993 Error(Loc, "duplicate numeric format"); 5994 return MatchOperand_ParseFail; 5995 } else if (Nfmt == NFMT_UNDEF) { 5996 Error(Loc, "duplicate data format"); 5997 return MatchOperand_ParseFail; 5998 } 5999 } 6000 6001 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 6002 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 6003 6004 if (isGFX10Plus()) { 6005 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt, getSTI()); 6006 if (Ufmt == UFMT_UNDEF) { 6007 Error(FormatLoc, "unsupported format"); 6008 return MatchOperand_ParseFail; 6009 } 6010 Format = Ufmt; 6011 } else { 6012 Format = encodeDfmtNfmt(Dfmt, Nfmt); 6013 } 6014 6015 return MatchOperand_Success; 6016 } 6017 6018 OperandMatchResultTy 6019 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 6020 SMLoc Loc, 6021 int64_t &Format) { 6022 using namespace llvm::AMDGPU::MTBUFFormat; 6023 6024 auto Id = getUnifiedFormat(FormatStr, getSTI()); 6025 if (Id == UFMT_UNDEF) 6026 return MatchOperand_NoMatch; 6027 6028 if (!isGFX10Plus()) { 6029 Error(Loc, "unified format is not supported on this GPU"); 6030 return MatchOperand_ParseFail; 6031 } 6032 6033 Format = Id; 6034 return MatchOperand_Success; 6035 } 6036 6037 OperandMatchResultTy 6038 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 6039 using namespace llvm::AMDGPU::MTBUFFormat; 6040 SMLoc Loc = getLoc(); 6041 6042 if (!parseExpr(Format)) 6043 return MatchOperand_ParseFail; 6044 if (!isValidFormatEncoding(Format, getSTI())) { 6045 Error(Loc, "out of range format"); 6046 return MatchOperand_ParseFail; 6047 } 6048 6049 return MatchOperand_Success; 6050 } 6051 6052 OperandMatchResultTy 6053 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 6054 using namespace llvm::AMDGPU::MTBUFFormat; 6055 6056 if (!trySkipId("format", AsmToken::Colon)) 6057 return MatchOperand_NoMatch; 6058 6059 if (trySkipToken(AsmToken::LBrac)) { 6060 StringRef FormatStr; 6061 SMLoc Loc = getLoc(); 6062 if (!parseId(FormatStr, "expected a format string")) 6063 return MatchOperand_ParseFail; 6064 6065 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 6066 if (Res == MatchOperand_NoMatch) 6067 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 6068 if (Res != MatchOperand_Success) 6069 return Res; 6070 6071 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 6072 return MatchOperand_ParseFail; 6073 6074 return MatchOperand_Success; 6075 } 6076 6077 return parseNumericFormat(Format); 6078 } 6079 6080 OperandMatchResultTy 6081 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 6082 using namespace llvm::AMDGPU::MTBUFFormat; 6083 6084 int64_t Format = getDefaultFormatEncoding(getSTI()); 6085 OperandMatchResultTy Res; 6086 SMLoc Loc = getLoc(); 6087 6088 // Parse legacy format syntax. 6089 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 6090 if (Res == MatchOperand_ParseFail) 6091 return Res; 6092 6093 bool FormatFound = (Res == MatchOperand_Success); 6094 6095 Operands.push_back( 6096 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 6097 6098 if (FormatFound) 6099 trySkipToken(AsmToken::Comma); 6100 6101 if (isToken(AsmToken::EndOfStatement)) { 6102 // We are expecting an soffset operand, 6103 // but let matcher handle the error. 6104 return MatchOperand_Success; 6105 } 6106 6107 // Parse soffset. 6108 Res = parseRegOrImm(Operands); 6109 if (Res != MatchOperand_Success) 6110 return Res; 6111 6112 trySkipToken(AsmToken::Comma); 6113 6114 if (!FormatFound) { 6115 Res = parseSymbolicOrNumericFormat(Format); 6116 if (Res == MatchOperand_ParseFail) 6117 return Res; 6118 if (Res == MatchOperand_Success) { 6119 auto Size = Operands.size(); 6120 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 6121 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 6122 Op.setImm(Format); 6123 } 6124 return MatchOperand_Success; 6125 } 6126 6127 if (isId("format") && peekToken().is(AsmToken::Colon)) { 6128 Error(getLoc(), "duplicate format"); 6129 return MatchOperand_ParseFail; 6130 } 6131 return MatchOperand_Success; 6132 } 6133 6134 //===----------------------------------------------------------------------===// 6135 // ds 6136 //===----------------------------------------------------------------------===// 6137 6138 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 6139 const OperandVector &Operands) { 6140 OptionalImmIndexMap OptionalIdx; 6141 6142 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6143 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6144 6145 // Add the register arguments 6146 if (Op.isReg()) { 6147 Op.addRegOperands(Inst, 1); 6148 continue; 6149 } 6150 6151 // Handle optional arguments 6152 OptionalIdx[Op.getImmTy()] = i; 6153 } 6154 6155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 6156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 6157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6158 6159 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6160 } 6161 6162 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 6163 bool IsGdsHardcoded) { 6164 OptionalImmIndexMap OptionalIdx; 6165 6166 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6167 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6168 6169 // Add the register arguments 6170 if (Op.isReg()) { 6171 Op.addRegOperands(Inst, 1); 6172 continue; 6173 } 6174 6175 if (Op.isToken() && Op.getToken() == "gds") { 6176 IsGdsHardcoded = true; 6177 continue; 6178 } 6179 6180 // Handle optional arguments 6181 OptionalIdx[Op.getImmTy()] = i; 6182 } 6183 6184 AMDGPUOperand::ImmTy OffsetType = 6185 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 6186 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 6187 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 6188 AMDGPUOperand::ImmTyOffset; 6189 6190 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 6191 6192 if (!IsGdsHardcoded) { 6193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 6194 } 6195 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 6196 } 6197 6198 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 6199 OptionalImmIndexMap OptionalIdx; 6200 6201 unsigned OperandIdx[4]; 6202 unsigned EnMask = 0; 6203 int SrcIdx = 0; 6204 6205 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6206 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6207 6208 // Add the register arguments 6209 if (Op.isReg()) { 6210 assert(SrcIdx < 4); 6211 OperandIdx[SrcIdx] = Inst.size(); 6212 Op.addRegOperands(Inst, 1); 6213 ++SrcIdx; 6214 continue; 6215 } 6216 6217 if (Op.isOff()) { 6218 assert(SrcIdx < 4); 6219 OperandIdx[SrcIdx] = Inst.size(); 6220 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 6221 ++SrcIdx; 6222 continue; 6223 } 6224 6225 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 6226 Op.addImmOperands(Inst, 1); 6227 continue; 6228 } 6229 6230 if (Op.isToken() && Op.getToken() == "done") 6231 continue; 6232 6233 // Handle optional arguments 6234 OptionalIdx[Op.getImmTy()] = i; 6235 } 6236 6237 assert(SrcIdx == 4); 6238 6239 bool Compr = false; 6240 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6241 Compr = true; 6242 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6243 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6244 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6245 } 6246 6247 for (auto i = 0; i < SrcIdx; ++i) { 6248 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6249 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6250 } 6251 } 6252 6253 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6254 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6255 6256 Inst.addOperand(MCOperand::createImm(EnMask)); 6257 } 6258 6259 //===----------------------------------------------------------------------===// 6260 // s_waitcnt 6261 //===----------------------------------------------------------------------===// 6262 6263 static bool 6264 encodeCnt( 6265 const AMDGPU::IsaVersion ISA, 6266 int64_t &IntVal, 6267 int64_t CntVal, 6268 bool Saturate, 6269 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6270 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6271 { 6272 bool Failed = false; 6273 6274 IntVal = encode(ISA, IntVal, CntVal); 6275 if (CntVal != decode(ISA, IntVal)) { 6276 if (Saturate) { 6277 IntVal = encode(ISA, IntVal, -1); 6278 } else { 6279 Failed = true; 6280 } 6281 } 6282 return Failed; 6283 } 6284 6285 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6286 6287 SMLoc CntLoc = getLoc(); 6288 StringRef CntName = getTokenStr(); 6289 6290 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6291 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6292 return false; 6293 6294 int64_t CntVal; 6295 SMLoc ValLoc = getLoc(); 6296 if (!parseExpr(CntVal)) 6297 return false; 6298 6299 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6300 6301 bool Failed = true; 6302 bool Sat = CntName.endswith("_sat"); 6303 6304 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6305 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6306 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6307 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6308 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6309 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6310 } else { 6311 Error(CntLoc, "invalid counter name " + CntName); 6312 return false; 6313 } 6314 6315 if (Failed) { 6316 Error(ValLoc, "too large value for " + CntName); 6317 return false; 6318 } 6319 6320 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6321 return false; 6322 6323 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6324 if (isToken(AsmToken::EndOfStatement)) { 6325 Error(getLoc(), "expected a counter name"); 6326 return false; 6327 } 6328 } 6329 6330 return true; 6331 } 6332 6333 OperandMatchResultTy 6334 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6335 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6336 int64_t Waitcnt = getWaitcntBitMask(ISA); 6337 SMLoc S = getLoc(); 6338 6339 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6340 while (!isToken(AsmToken::EndOfStatement)) { 6341 if (!parseCnt(Waitcnt)) 6342 return MatchOperand_ParseFail; 6343 } 6344 } else { 6345 if (!parseExpr(Waitcnt)) 6346 return MatchOperand_ParseFail; 6347 } 6348 6349 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6350 return MatchOperand_Success; 6351 } 6352 6353 bool 6354 AMDGPUOperand::isSWaitCnt() const { 6355 return isImm(); 6356 } 6357 6358 //===----------------------------------------------------------------------===// 6359 // DepCtr 6360 //===----------------------------------------------------------------------===// 6361 6362 void AMDGPUAsmParser::depCtrError(SMLoc Loc, int ErrorId, 6363 StringRef DepCtrName) { 6364 switch (ErrorId) { 6365 case OPR_ID_UNKNOWN: 6366 Error(Loc, Twine("invalid counter name ", DepCtrName)); 6367 return; 6368 case OPR_ID_UNSUPPORTED: 6369 Error(Loc, Twine(DepCtrName, " is not supported on this GPU")); 6370 return; 6371 case OPR_ID_DUPLICATE: 6372 Error(Loc, Twine("duplicate counter name ", DepCtrName)); 6373 return; 6374 case OPR_VAL_INVALID: 6375 Error(Loc, Twine("invalid value for ", DepCtrName)); 6376 return; 6377 default: 6378 assert(false); 6379 } 6380 } 6381 6382 bool AMDGPUAsmParser::parseDepCtr(int64_t &DepCtr, unsigned &UsedOprMask) { 6383 6384 using namespace llvm::AMDGPU::DepCtr; 6385 6386 SMLoc DepCtrLoc = getLoc(); 6387 StringRef DepCtrName = getTokenStr(); 6388 6389 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6390 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6391 return false; 6392 6393 int64_t ExprVal; 6394 if (!parseExpr(ExprVal)) 6395 return false; 6396 6397 unsigned PrevOprMask = UsedOprMask; 6398 int CntVal = encodeDepCtr(DepCtrName, ExprVal, UsedOprMask, getSTI()); 6399 6400 if (CntVal < 0) { 6401 depCtrError(DepCtrLoc, CntVal, DepCtrName); 6402 return false; 6403 } 6404 6405 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6406 return false; 6407 6408 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6409 if (isToken(AsmToken::EndOfStatement)) { 6410 Error(getLoc(), "expected a counter name"); 6411 return false; 6412 } 6413 } 6414 6415 unsigned CntValMask = PrevOprMask ^ UsedOprMask; 6416 DepCtr = (DepCtr & ~CntValMask) | CntVal; 6417 return true; 6418 } 6419 6420 OperandMatchResultTy AMDGPUAsmParser::parseDepCtrOps(OperandVector &Operands) { 6421 using namespace llvm::AMDGPU::DepCtr; 6422 6423 int64_t DepCtr = getDefaultDepCtrEncoding(getSTI()); 6424 SMLoc Loc = getLoc(); 6425 6426 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6427 unsigned UsedOprMask = 0; 6428 while (!isToken(AsmToken::EndOfStatement)) { 6429 if (!parseDepCtr(DepCtr, UsedOprMask)) 6430 return MatchOperand_ParseFail; 6431 } 6432 } else { 6433 if (!parseExpr(DepCtr)) 6434 return MatchOperand_ParseFail; 6435 } 6436 6437 Operands.push_back(AMDGPUOperand::CreateImm(this, DepCtr, Loc)); 6438 return MatchOperand_Success; 6439 } 6440 6441 bool AMDGPUOperand::isDepCtr() const { return isS16Imm(); } 6442 6443 //===----------------------------------------------------------------------===// 6444 // hwreg 6445 //===----------------------------------------------------------------------===// 6446 6447 bool 6448 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6449 OperandInfoTy &Offset, 6450 OperandInfoTy &Width) { 6451 using namespace llvm::AMDGPU::Hwreg; 6452 6453 // The register may be specified by name or using a numeric code 6454 HwReg.Loc = getLoc(); 6455 if (isToken(AsmToken::Identifier) && 6456 (HwReg.Id = getHwregId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6457 HwReg.IsSymbolic = true; 6458 lex(); // skip register name 6459 } else if (!parseExpr(HwReg.Id, "a register name")) { 6460 return false; 6461 } 6462 6463 if (trySkipToken(AsmToken::RParen)) 6464 return true; 6465 6466 // parse optional params 6467 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6468 return false; 6469 6470 Offset.Loc = getLoc(); 6471 if (!parseExpr(Offset.Id)) 6472 return false; 6473 6474 if (!skipToken(AsmToken::Comma, "expected a comma")) 6475 return false; 6476 6477 Width.Loc = getLoc(); 6478 return parseExpr(Width.Id) && 6479 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6480 } 6481 6482 bool 6483 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6484 const OperandInfoTy &Offset, 6485 const OperandInfoTy &Width) { 6486 6487 using namespace llvm::AMDGPU::Hwreg; 6488 6489 if (HwReg.IsSymbolic) { 6490 if (HwReg.Id == OPR_ID_UNSUPPORTED) { 6491 Error(HwReg.Loc, 6492 "specified hardware register is not supported on this GPU"); 6493 return false; 6494 } 6495 } else { 6496 if (!isValidHwreg(HwReg.Id)) { 6497 Error(HwReg.Loc, 6498 "invalid code of hardware register: only 6-bit values are legal"); 6499 return false; 6500 } 6501 } 6502 if (!isValidHwregOffset(Offset.Id)) { 6503 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6504 return false; 6505 } 6506 if (!isValidHwregWidth(Width.Id)) { 6507 Error(Width.Loc, 6508 "invalid bitfield width: only values from 1 to 32 are legal"); 6509 return false; 6510 } 6511 return true; 6512 } 6513 6514 OperandMatchResultTy 6515 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6516 using namespace llvm::AMDGPU::Hwreg; 6517 6518 int64_t ImmVal = 0; 6519 SMLoc Loc = getLoc(); 6520 6521 if (trySkipId("hwreg", AsmToken::LParen)) { 6522 OperandInfoTy HwReg(OPR_ID_UNKNOWN); 6523 OperandInfoTy Offset(OFFSET_DEFAULT_); 6524 OperandInfoTy Width(WIDTH_DEFAULT_); 6525 if (parseHwregBody(HwReg, Offset, Width) && 6526 validateHwreg(HwReg, Offset, Width)) { 6527 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6528 } else { 6529 return MatchOperand_ParseFail; 6530 } 6531 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6532 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6533 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6534 return MatchOperand_ParseFail; 6535 } 6536 } else { 6537 return MatchOperand_ParseFail; 6538 } 6539 6540 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6541 return MatchOperand_Success; 6542 } 6543 6544 bool AMDGPUOperand::isHwreg() const { 6545 return isImmTy(ImmTyHwreg); 6546 } 6547 6548 //===----------------------------------------------------------------------===// 6549 // sendmsg 6550 //===----------------------------------------------------------------------===// 6551 6552 bool 6553 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6554 OperandInfoTy &Op, 6555 OperandInfoTy &Stream) { 6556 using namespace llvm::AMDGPU::SendMsg; 6557 6558 Msg.Loc = getLoc(); 6559 if (isToken(AsmToken::Identifier) && 6560 (Msg.Id = getMsgId(getTokenStr(), getSTI())) != OPR_ID_UNKNOWN) { 6561 Msg.IsSymbolic = true; 6562 lex(); // skip message name 6563 } else if (!parseExpr(Msg.Id, "a message name")) { 6564 return false; 6565 } 6566 6567 if (trySkipToken(AsmToken::Comma)) { 6568 Op.IsDefined = true; 6569 Op.Loc = getLoc(); 6570 if (isToken(AsmToken::Identifier) && 6571 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6572 lex(); // skip operation name 6573 } else if (!parseExpr(Op.Id, "an operation name")) { 6574 return false; 6575 } 6576 6577 if (trySkipToken(AsmToken::Comma)) { 6578 Stream.IsDefined = true; 6579 Stream.Loc = getLoc(); 6580 if (!parseExpr(Stream.Id)) 6581 return false; 6582 } 6583 } 6584 6585 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6586 } 6587 6588 bool 6589 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6590 const OperandInfoTy &Op, 6591 const OperandInfoTy &Stream) { 6592 using namespace llvm::AMDGPU::SendMsg; 6593 6594 // Validation strictness depends on whether message is specified 6595 // in a symbolic or in a numeric form. In the latter case 6596 // only encoding possibility is checked. 6597 bool Strict = Msg.IsSymbolic; 6598 6599 if (Strict) { 6600 if (Msg.Id == OPR_ID_UNSUPPORTED) { 6601 Error(Msg.Loc, "specified message id is not supported on this GPU"); 6602 return false; 6603 } 6604 } else { 6605 if (!isValidMsgId(Msg.Id)) { 6606 Error(Msg.Loc, "invalid message id"); 6607 return false; 6608 } 6609 } 6610 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6611 if (Op.IsDefined) { 6612 Error(Op.Loc, "message does not support operations"); 6613 } else { 6614 Error(Msg.Loc, "missing message operation"); 6615 } 6616 return false; 6617 } 6618 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6619 Error(Op.Loc, "invalid operation id"); 6620 return false; 6621 } 6622 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6623 Error(Stream.Loc, "message operation does not support streams"); 6624 return false; 6625 } 6626 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6627 Error(Stream.Loc, "invalid message stream id"); 6628 return false; 6629 } 6630 return true; 6631 } 6632 6633 OperandMatchResultTy 6634 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6635 using namespace llvm::AMDGPU::SendMsg; 6636 6637 int64_t ImmVal = 0; 6638 SMLoc Loc = getLoc(); 6639 6640 if (trySkipId("sendmsg", AsmToken::LParen)) { 6641 OperandInfoTy Msg(OPR_ID_UNKNOWN); 6642 OperandInfoTy Op(OP_NONE_); 6643 OperandInfoTy Stream(STREAM_ID_NONE_); 6644 if (parseSendMsgBody(Msg, Op, Stream) && 6645 validateSendMsg(Msg, Op, Stream)) { 6646 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6647 } else { 6648 return MatchOperand_ParseFail; 6649 } 6650 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6651 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6652 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6653 return MatchOperand_ParseFail; 6654 } 6655 } else { 6656 return MatchOperand_ParseFail; 6657 } 6658 6659 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6660 return MatchOperand_Success; 6661 } 6662 6663 bool AMDGPUOperand::isSendMsg() const { 6664 return isImmTy(ImmTySendMsg); 6665 } 6666 6667 //===----------------------------------------------------------------------===// 6668 // v_interp 6669 //===----------------------------------------------------------------------===// 6670 6671 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6672 StringRef Str; 6673 SMLoc S = getLoc(); 6674 6675 if (!parseId(Str)) 6676 return MatchOperand_NoMatch; 6677 6678 int Slot = StringSwitch<int>(Str) 6679 .Case("p10", 0) 6680 .Case("p20", 1) 6681 .Case("p0", 2) 6682 .Default(-1); 6683 6684 if (Slot == -1) { 6685 Error(S, "invalid interpolation slot"); 6686 return MatchOperand_ParseFail; 6687 } 6688 6689 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6690 AMDGPUOperand::ImmTyInterpSlot)); 6691 return MatchOperand_Success; 6692 } 6693 6694 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6695 StringRef Str; 6696 SMLoc S = getLoc(); 6697 6698 if (!parseId(Str)) 6699 return MatchOperand_NoMatch; 6700 6701 if (!Str.startswith("attr")) { 6702 Error(S, "invalid interpolation attribute"); 6703 return MatchOperand_ParseFail; 6704 } 6705 6706 StringRef Chan = Str.take_back(2); 6707 int AttrChan = StringSwitch<int>(Chan) 6708 .Case(".x", 0) 6709 .Case(".y", 1) 6710 .Case(".z", 2) 6711 .Case(".w", 3) 6712 .Default(-1); 6713 if (AttrChan == -1) { 6714 Error(S, "invalid or missing interpolation attribute channel"); 6715 return MatchOperand_ParseFail; 6716 } 6717 6718 Str = Str.drop_back(2).drop_front(4); 6719 6720 uint8_t Attr; 6721 if (Str.getAsInteger(10, Attr)) { 6722 Error(S, "invalid or missing interpolation attribute number"); 6723 return MatchOperand_ParseFail; 6724 } 6725 6726 if (Attr > 63) { 6727 Error(S, "out of bounds interpolation attribute number"); 6728 return MatchOperand_ParseFail; 6729 } 6730 6731 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6732 6733 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6734 AMDGPUOperand::ImmTyInterpAttr)); 6735 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6736 AMDGPUOperand::ImmTyAttrChan)); 6737 return MatchOperand_Success; 6738 } 6739 6740 //===----------------------------------------------------------------------===// 6741 // exp 6742 //===----------------------------------------------------------------------===// 6743 6744 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6745 using namespace llvm::AMDGPU::Exp; 6746 6747 StringRef Str; 6748 SMLoc S = getLoc(); 6749 6750 if (!parseId(Str)) 6751 return MatchOperand_NoMatch; 6752 6753 unsigned Id = getTgtId(Str); 6754 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6755 Error(S, (Id == ET_INVALID) ? 6756 "invalid exp target" : 6757 "exp target is not supported on this GPU"); 6758 return MatchOperand_ParseFail; 6759 } 6760 6761 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6762 AMDGPUOperand::ImmTyExpTgt)); 6763 return MatchOperand_Success; 6764 } 6765 6766 //===----------------------------------------------------------------------===// 6767 // parser helpers 6768 //===----------------------------------------------------------------------===// 6769 6770 bool 6771 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6772 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6773 } 6774 6775 bool 6776 AMDGPUAsmParser::isId(const StringRef Id) const { 6777 return isId(getToken(), Id); 6778 } 6779 6780 bool 6781 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6782 return getTokenKind() == Kind; 6783 } 6784 6785 bool 6786 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6787 if (isId(Id)) { 6788 lex(); 6789 return true; 6790 } 6791 return false; 6792 } 6793 6794 bool 6795 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6796 if (isToken(AsmToken::Identifier)) { 6797 StringRef Tok = getTokenStr(); 6798 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6799 lex(); 6800 return true; 6801 } 6802 } 6803 return false; 6804 } 6805 6806 bool 6807 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6808 if (isId(Id) && peekToken().is(Kind)) { 6809 lex(); 6810 lex(); 6811 return true; 6812 } 6813 return false; 6814 } 6815 6816 bool 6817 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6818 if (isToken(Kind)) { 6819 lex(); 6820 return true; 6821 } 6822 return false; 6823 } 6824 6825 bool 6826 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6827 const StringRef ErrMsg) { 6828 if (!trySkipToken(Kind)) { 6829 Error(getLoc(), ErrMsg); 6830 return false; 6831 } 6832 return true; 6833 } 6834 6835 bool 6836 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6837 SMLoc S = getLoc(); 6838 6839 const MCExpr *Expr; 6840 if (Parser.parseExpression(Expr)) 6841 return false; 6842 6843 if (Expr->evaluateAsAbsolute(Imm)) 6844 return true; 6845 6846 if (Expected.empty()) { 6847 Error(S, "expected absolute expression"); 6848 } else { 6849 Error(S, Twine("expected ", Expected) + 6850 Twine(" or an absolute expression")); 6851 } 6852 return false; 6853 } 6854 6855 bool 6856 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6857 SMLoc S = getLoc(); 6858 6859 const MCExpr *Expr; 6860 if (Parser.parseExpression(Expr)) 6861 return false; 6862 6863 int64_t IntVal; 6864 if (Expr->evaluateAsAbsolute(IntVal)) { 6865 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6866 } else { 6867 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6868 } 6869 return true; 6870 } 6871 6872 bool 6873 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6874 if (isToken(AsmToken::String)) { 6875 Val = getToken().getStringContents(); 6876 lex(); 6877 return true; 6878 } else { 6879 Error(getLoc(), ErrMsg); 6880 return false; 6881 } 6882 } 6883 6884 bool 6885 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6886 if (isToken(AsmToken::Identifier)) { 6887 Val = getTokenStr(); 6888 lex(); 6889 return true; 6890 } else { 6891 if (!ErrMsg.empty()) 6892 Error(getLoc(), ErrMsg); 6893 return false; 6894 } 6895 } 6896 6897 AsmToken 6898 AMDGPUAsmParser::getToken() const { 6899 return Parser.getTok(); 6900 } 6901 6902 AsmToken 6903 AMDGPUAsmParser::peekToken() { 6904 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6905 } 6906 6907 void 6908 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6909 auto TokCount = getLexer().peekTokens(Tokens); 6910 6911 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6912 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6913 } 6914 6915 AsmToken::TokenKind 6916 AMDGPUAsmParser::getTokenKind() const { 6917 return getLexer().getKind(); 6918 } 6919 6920 SMLoc 6921 AMDGPUAsmParser::getLoc() const { 6922 return getToken().getLoc(); 6923 } 6924 6925 StringRef 6926 AMDGPUAsmParser::getTokenStr() const { 6927 return getToken().getString(); 6928 } 6929 6930 void 6931 AMDGPUAsmParser::lex() { 6932 Parser.Lex(); 6933 } 6934 6935 SMLoc 6936 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6937 const OperandVector &Operands) const { 6938 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6939 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6940 if (Test(Op)) 6941 return Op.getStartLoc(); 6942 } 6943 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6944 } 6945 6946 SMLoc 6947 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6948 const OperandVector &Operands) const { 6949 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6950 return getOperandLoc(Test, Operands); 6951 } 6952 6953 SMLoc 6954 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6955 const OperandVector &Operands) const { 6956 auto Test = [=](const AMDGPUOperand& Op) { 6957 return Op.isRegKind() && Op.getReg() == Reg; 6958 }; 6959 return getOperandLoc(Test, Operands); 6960 } 6961 6962 SMLoc 6963 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6964 auto Test = [](const AMDGPUOperand& Op) { 6965 return Op.IsImmKindLiteral() || Op.isExpr(); 6966 }; 6967 return getOperandLoc(Test, Operands); 6968 } 6969 6970 SMLoc 6971 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6972 auto Test = [](const AMDGPUOperand& Op) { 6973 return Op.isImmKindConst(); 6974 }; 6975 return getOperandLoc(Test, Operands); 6976 } 6977 6978 //===----------------------------------------------------------------------===// 6979 // swizzle 6980 //===----------------------------------------------------------------------===// 6981 6982 LLVM_READNONE 6983 static unsigned 6984 encodeBitmaskPerm(const unsigned AndMask, 6985 const unsigned OrMask, 6986 const unsigned XorMask) { 6987 using namespace llvm::AMDGPU::Swizzle; 6988 6989 return BITMASK_PERM_ENC | 6990 (AndMask << BITMASK_AND_SHIFT) | 6991 (OrMask << BITMASK_OR_SHIFT) | 6992 (XorMask << BITMASK_XOR_SHIFT); 6993 } 6994 6995 bool 6996 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6997 const unsigned MinVal, 6998 const unsigned MaxVal, 6999 const StringRef ErrMsg, 7000 SMLoc &Loc) { 7001 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7002 return false; 7003 } 7004 Loc = getLoc(); 7005 if (!parseExpr(Op)) { 7006 return false; 7007 } 7008 if (Op < MinVal || Op > MaxVal) { 7009 Error(Loc, ErrMsg); 7010 return false; 7011 } 7012 7013 return true; 7014 } 7015 7016 bool 7017 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 7018 const unsigned MinVal, 7019 const unsigned MaxVal, 7020 const StringRef ErrMsg) { 7021 SMLoc Loc; 7022 for (unsigned i = 0; i < OpNum; ++i) { 7023 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 7024 return false; 7025 } 7026 7027 return true; 7028 } 7029 7030 bool 7031 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 7032 using namespace llvm::AMDGPU::Swizzle; 7033 7034 int64_t Lane[LANE_NUM]; 7035 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 7036 "expected a 2-bit lane id")) { 7037 Imm = QUAD_PERM_ENC; 7038 for (unsigned I = 0; I < LANE_NUM; ++I) { 7039 Imm |= Lane[I] << (LANE_SHIFT * I); 7040 } 7041 return true; 7042 } 7043 return false; 7044 } 7045 7046 bool 7047 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 7048 using namespace llvm::AMDGPU::Swizzle; 7049 7050 SMLoc Loc; 7051 int64_t GroupSize; 7052 int64_t LaneIdx; 7053 7054 if (!parseSwizzleOperand(GroupSize, 7055 2, 32, 7056 "group size must be in the interval [2,32]", 7057 Loc)) { 7058 return false; 7059 } 7060 if (!isPowerOf2_64(GroupSize)) { 7061 Error(Loc, "group size must be a power of two"); 7062 return false; 7063 } 7064 if (parseSwizzleOperand(LaneIdx, 7065 0, GroupSize - 1, 7066 "lane id must be in the interval [0,group size - 1]", 7067 Loc)) { 7068 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 7069 return true; 7070 } 7071 return false; 7072 } 7073 7074 bool 7075 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 7076 using namespace llvm::AMDGPU::Swizzle; 7077 7078 SMLoc Loc; 7079 int64_t GroupSize; 7080 7081 if (!parseSwizzleOperand(GroupSize, 7082 2, 32, 7083 "group size must be in the interval [2,32]", 7084 Loc)) { 7085 return false; 7086 } 7087 if (!isPowerOf2_64(GroupSize)) { 7088 Error(Loc, "group size must be a power of two"); 7089 return false; 7090 } 7091 7092 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 7093 return true; 7094 } 7095 7096 bool 7097 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 7098 using namespace llvm::AMDGPU::Swizzle; 7099 7100 SMLoc Loc; 7101 int64_t GroupSize; 7102 7103 if (!parseSwizzleOperand(GroupSize, 7104 1, 16, 7105 "group size must be in the interval [1,16]", 7106 Loc)) { 7107 return false; 7108 } 7109 if (!isPowerOf2_64(GroupSize)) { 7110 Error(Loc, "group size must be a power of two"); 7111 return false; 7112 } 7113 7114 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 7115 return true; 7116 } 7117 7118 bool 7119 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 7120 using namespace llvm::AMDGPU::Swizzle; 7121 7122 if (!skipToken(AsmToken::Comma, "expected a comma")) { 7123 return false; 7124 } 7125 7126 StringRef Ctl; 7127 SMLoc StrLoc = getLoc(); 7128 if (!parseString(Ctl)) { 7129 return false; 7130 } 7131 if (Ctl.size() != BITMASK_WIDTH) { 7132 Error(StrLoc, "expected a 5-character mask"); 7133 return false; 7134 } 7135 7136 unsigned AndMask = 0; 7137 unsigned OrMask = 0; 7138 unsigned XorMask = 0; 7139 7140 for (size_t i = 0; i < Ctl.size(); ++i) { 7141 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 7142 switch(Ctl[i]) { 7143 default: 7144 Error(StrLoc, "invalid mask"); 7145 return false; 7146 case '0': 7147 break; 7148 case '1': 7149 OrMask |= Mask; 7150 break; 7151 case 'p': 7152 AndMask |= Mask; 7153 break; 7154 case 'i': 7155 AndMask |= Mask; 7156 XorMask |= Mask; 7157 break; 7158 } 7159 } 7160 7161 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 7162 return true; 7163 } 7164 7165 bool 7166 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 7167 7168 SMLoc OffsetLoc = getLoc(); 7169 7170 if (!parseExpr(Imm, "a swizzle macro")) { 7171 return false; 7172 } 7173 if (!isUInt<16>(Imm)) { 7174 Error(OffsetLoc, "expected a 16-bit offset"); 7175 return false; 7176 } 7177 return true; 7178 } 7179 7180 bool 7181 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 7182 using namespace llvm::AMDGPU::Swizzle; 7183 7184 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 7185 7186 SMLoc ModeLoc = getLoc(); 7187 bool Ok = false; 7188 7189 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 7190 Ok = parseSwizzleQuadPerm(Imm); 7191 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 7192 Ok = parseSwizzleBitmaskPerm(Imm); 7193 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 7194 Ok = parseSwizzleBroadcast(Imm); 7195 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 7196 Ok = parseSwizzleSwap(Imm); 7197 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 7198 Ok = parseSwizzleReverse(Imm); 7199 } else { 7200 Error(ModeLoc, "expected a swizzle mode"); 7201 } 7202 7203 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 7204 } 7205 7206 return false; 7207 } 7208 7209 OperandMatchResultTy 7210 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 7211 SMLoc S = getLoc(); 7212 int64_t Imm = 0; 7213 7214 if (trySkipId("offset")) { 7215 7216 bool Ok = false; 7217 if (skipToken(AsmToken::Colon, "expected a colon")) { 7218 if (trySkipId("swizzle")) { 7219 Ok = parseSwizzleMacro(Imm); 7220 } else { 7221 Ok = parseSwizzleOffset(Imm); 7222 } 7223 } 7224 7225 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 7226 7227 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 7228 } else { 7229 // Swizzle "offset" operand is optional. 7230 // If it is omitted, try parsing other optional operands. 7231 return parseOptionalOpr(Operands); 7232 } 7233 } 7234 7235 bool 7236 AMDGPUOperand::isSwizzle() const { 7237 return isImmTy(ImmTySwizzle); 7238 } 7239 7240 //===----------------------------------------------------------------------===// 7241 // VGPR Index Mode 7242 //===----------------------------------------------------------------------===// 7243 7244 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 7245 7246 using namespace llvm::AMDGPU::VGPRIndexMode; 7247 7248 if (trySkipToken(AsmToken::RParen)) { 7249 return OFF; 7250 } 7251 7252 int64_t Imm = 0; 7253 7254 while (true) { 7255 unsigned Mode = 0; 7256 SMLoc S = getLoc(); 7257 7258 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 7259 if (trySkipId(IdSymbolic[ModeId])) { 7260 Mode = 1 << ModeId; 7261 break; 7262 } 7263 } 7264 7265 if (Mode == 0) { 7266 Error(S, (Imm == 0)? 7267 "expected a VGPR index mode or a closing parenthesis" : 7268 "expected a VGPR index mode"); 7269 return UNDEF; 7270 } 7271 7272 if (Imm & Mode) { 7273 Error(S, "duplicate VGPR index mode"); 7274 return UNDEF; 7275 } 7276 Imm |= Mode; 7277 7278 if (trySkipToken(AsmToken::RParen)) 7279 break; 7280 if (!skipToken(AsmToken::Comma, 7281 "expected a comma or a closing parenthesis")) 7282 return UNDEF; 7283 } 7284 7285 return Imm; 7286 } 7287 7288 OperandMatchResultTy 7289 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 7290 7291 using namespace llvm::AMDGPU::VGPRIndexMode; 7292 7293 int64_t Imm = 0; 7294 SMLoc S = getLoc(); 7295 7296 if (trySkipId("gpr_idx", AsmToken::LParen)) { 7297 Imm = parseGPRIdxMacro(); 7298 if (Imm == UNDEF) 7299 return MatchOperand_ParseFail; 7300 } else { 7301 if (getParser().parseAbsoluteExpression(Imm)) 7302 return MatchOperand_ParseFail; 7303 if (Imm < 0 || !isUInt<4>(Imm)) { 7304 Error(S, "invalid immediate: only 4-bit values are legal"); 7305 return MatchOperand_ParseFail; 7306 } 7307 } 7308 7309 Operands.push_back( 7310 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 7311 return MatchOperand_Success; 7312 } 7313 7314 bool AMDGPUOperand::isGPRIdxMode() const { 7315 return isImmTy(ImmTyGprIdxMode); 7316 } 7317 7318 //===----------------------------------------------------------------------===// 7319 // sopp branch targets 7320 //===----------------------------------------------------------------------===// 7321 7322 OperandMatchResultTy 7323 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 7324 7325 // Make sure we are not parsing something 7326 // that looks like a label or an expression but is not. 7327 // This will improve error messages. 7328 if (isRegister() || isModifier()) 7329 return MatchOperand_NoMatch; 7330 7331 if (!parseExpr(Operands)) 7332 return MatchOperand_ParseFail; 7333 7334 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7335 assert(Opr.isImm() || Opr.isExpr()); 7336 SMLoc Loc = Opr.getStartLoc(); 7337 7338 // Currently we do not support arbitrary expressions as branch targets. 7339 // Only labels and absolute expressions are accepted. 7340 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7341 Error(Loc, "expected an absolute expression or a label"); 7342 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7343 Error(Loc, "expected a 16-bit signed jump offset"); 7344 } 7345 7346 return MatchOperand_Success; 7347 } 7348 7349 //===----------------------------------------------------------------------===// 7350 // Boolean holding registers 7351 //===----------------------------------------------------------------------===// 7352 7353 OperandMatchResultTy 7354 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7355 return parseReg(Operands); 7356 } 7357 7358 //===----------------------------------------------------------------------===// 7359 // mubuf 7360 //===----------------------------------------------------------------------===// 7361 7362 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7363 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7364 } 7365 7366 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7367 const OperandVector &Operands, 7368 bool IsAtomic, 7369 bool IsLds) { 7370 OptionalImmIndexMap OptionalIdx; 7371 unsigned FirstOperandIdx = 1; 7372 bool IsAtomicReturn = false; 7373 7374 if (IsAtomic) { 7375 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7376 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7377 if (!Op.isCPol()) 7378 continue; 7379 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7380 break; 7381 } 7382 7383 if (!IsAtomicReturn) { 7384 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7385 if (NewOpc != -1) 7386 Inst.setOpcode(NewOpc); 7387 } 7388 7389 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7390 SIInstrFlags::IsAtomicRet; 7391 } 7392 7393 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7394 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7395 7396 // Add the register arguments 7397 if (Op.isReg()) { 7398 Op.addRegOperands(Inst, 1); 7399 // Insert a tied src for atomic return dst. 7400 // This cannot be postponed as subsequent calls to 7401 // addImmOperands rely on correct number of MC operands. 7402 if (IsAtomicReturn && i == FirstOperandIdx) 7403 Op.addRegOperands(Inst, 1); 7404 continue; 7405 } 7406 7407 // Handle the case where soffset is an immediate 7408 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7409 Op.addImmOperands(Inst, 1); 7410 continue; 7411 } 7412 7413 // Handle tokens like 'offen' which are sometimes hard-coded into the 7414 // asm string. There are no MCInst operands for these. 7415 if (Op.isToken()) { 7416 continue; 7417 } 7418 assert(Op.isImm()); 7419 7420 // Handle optional arguments 7421 OptionalIdx[Op.getImmTy()] = i; 7422 } 7423 7424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7425 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7426 7427 if (!IsLds) { // tfe is not legal with lds opcodes 7428 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7429 } 7430 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7431 } 7432 7433 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7434 OptionalImmIndexMap OptionalIdx; 7435 7436 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7437 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7438 7439 // Add the register arguments 7440 if (Op.isReg()) { 7441 Op.addRegOperands(Inst, 1); 7442 continue; 7443 } 7444 7445 // Handle the case where soffset is an immediate 7446 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7447 Op.addImmOperands(Inst, 1); 7448 continue; 7449 } 7450 7451 // Handle tokens like 'offen' which are sometimes hard-coded into the 7452 // asm string. There are no MCInst operands for these. 7453 if (Op.isToken()) { 7454 continue; 7455 } 7456 assert(Op.isImm()); 7457 7458 // Handle optional arguments 7459 OptionalIdx[Op.getImmTy()] = i; 7460 } 7461 7462 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7463 AMDGPUOperand::ImmTyOffset); 7464 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7465 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7466 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7468 } 7469 7470 //===----------------------------------------------------------------------===// 7471 // mimg 7472 //===----------------------------------------------------------------------===// 7473 7474 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7475 bool IsAtomic) { 7476 unsigned I = 1; 7477 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7478 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7479 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7480 } 7481 7482 if (IsAtomic) { 7483 // Add src, same as dst 7484 assert(Desc.getNumDefs() == 1); 7485 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7486 } 7487 7488 OptionalImmIndexMap OptionalIdx; 7489 7490 for (unsigned E = Operands.size(); I != E; ++I) { 7491 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7492 7493 // Add the register arguments 7494 if (Op.isReg()) { 7495 Op.addRegOperands(Inst, 1); 7496 } else if (Op.isImmModifier()) { 7497 OptionalIdx[Op.getImmTy()] = I; 7498 } else if (!Op.isToken()) { 7499 llvm_unreachable("unexpected operand type"); 7500 } 7501 } 7502 7503 bool IsGFX10Plus = isGFX10Plus(); 7504 7505 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7506 if (IsGFX10Plus) 7507 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7508 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7509 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7510 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7511 if (IsGFX10Plus) 7512 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7513 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7514 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7515 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7516 if (!IsGFX10Plus) 7517 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7518 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7519 } 7520 7521 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7522 cvtMIMG(Inst, Operands, true); 7523 } 7524 7525 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7526 OptionalImmIndexMap OptionalIdx; 7527 bool IsAtomicReturn = false; 7528 7529 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7530 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7531 if (!Op.isCPol()) 7532 continue; 7533 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7534 break; 7535 } 7536 7537 if (!IsAtomicReturn) { 7538 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7539 if (NewOpc != -1) 7540 Inst.setOpcode(NewOpc); 7541 } 7542 7543 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7544 SIInstrFlags::IsAtomicRet; 7545 7546 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7547 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7548 7549 // Add the register arguments 7550 if (Op.isReg()) { 7551 Op.addRegOperands(Inst, 1); 7552 if (IsAtomicReturn && i == 1) 7553 Op.addRegOperands(Inst, 1); 7554 continue; 7555 } 7556 7557 // Handle the case where soffset is an immediate 7558 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7559 Op.addImmOperands(Inst, 1); 7560 continue; 7561 } 7562 7563 // Handle tokens like 'offen' which are sometimes hard-coded into the 7564 // asm string. There are no MCInst operands for these. 7565 if (Op.isToken()) { 7566 continue; 7567 } 7568 assert(Op.isImm()); 7569 7570 // Handle optional arguments 7571 OptionalIdx[Op.getImmTy()] = i; 7572 } 7573 7574 if ((int)Inst.getNumOperands() <= 7575 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7576 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7578 } 7579 7580 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7581 const OperandVector &Operands) { 7582 for (unsigned I = 1; I < Operands.size(); ++I) { 7583 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7584 if (Operand.isReg()) 7585 Operand.addRegOperands(Inst, 1); 7586 } 7587 7588 Inst.addOperand(MCOperand::createImm(1)); // a16 7589 } 7590 7591 //===----------------------------------------------------------------------===// 7592 // smrd 7593 //===----------------------------------------------------------------------===// 7594 7595 bool AMDGPUOperand::isSMRDOffset8() const { 7596 return isImm() && isUInt<8>(getImm()); 7597 } 7598 7599 bool AMDGPUOperand::isSMEMOffset() const { 7600 return isImm(); // Offset range is checked later by validator. 7601 } 7602 7603 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7604 // 32-bit literals are only supported on CI and we only want to use them 7605 // when the offset is > 8-bits. 7606 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7607 } 7608 7609 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7610 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7611 } 7612 7613 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7614 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7615 } 7616 7617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7618 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7619 } 7620 7621 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7622 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7623 } 7624 7625 //===----------------------------------------------------------------------===// 7626 // vop3 7627 //===----------------------------------------------------------------------===// 7628 7629 static bool ConvertOmodMul(int64_t &Mul) { 7630 if (Mul != 1 && Mul != 2 && Mul != 4) 7631 return false; 7632 7633 Mul >>= 1; 7634 return true; 7635 } 7636 7637 static bool ConvertOmodDiv(int64_t &Div) { 7638 if (Div == 1) { 7639 Div = 0; 7640 return true; 7641 } 7642 7643 if (Div == 2) { 7644 Div = 3; 7645 return true; 7646 } 7647 7648 return false; 7649 } 7650 7651 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7652 // This is intentional and ensures compatibility with sp3. 7653 // See bug 35397 for details. 7654 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7655 if (BoundCtrl == 0 || BoundCtrl == 1) { 7656 BoundCtrl = 1; 7657 return true; 7658 } 7659 return false; 7660 } 7661 7662 // Note: the order in this table matches the order of operands in AsmString. 7663 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7664 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7665 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7666 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7667 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7668 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7669 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7670 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7671 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7672 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7673 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7674 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7675 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7676 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7677 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7678 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7679 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7680 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7681 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7682 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7683 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7684 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7685 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7686 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7687 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7688 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7689 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7690 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7691 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7692 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7693 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7694 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7695 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7696 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7697 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7698 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7699 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7700 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7701 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7702 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7703 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7704 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7705 }; 7706 7707 void AMDGPUAsmParser::onBeginOfFile() { 7708 if (!getParser().getStreamer().getTargetStreamer() || 7709 getSTI().getTargetTriple().getArch() == Triple::r600) 7710 return; 7711 7712 if (!getTargetStreamer().getTargetID()) 7713 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7714 7715 if (isHsaAbiVersion3AndAbove(&getSTI())) 7716 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7717 } 7718 7719 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7720 7721 OperandMatchResultTy res = parseOptionalOpr(Operands); 7722 7723 // This is a hack to enable hardcoded mandatory operands which follow 7724 // optional operands. 7725 // 7726 // Current design assumes that all operands after the first optional operand 7727 // are also optional. However implementation of some instructions violates 7728 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7729 // 7730 // To alleviate this problem, we have to (implicitly) parse extra operands 7731 // to make sure autogenerated parser of custom operands never hit hardcoded 7732 // mandatory operands. 7733 7734 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7735 if (res != MatchOperand_Success || 7736 isToken(AsmToken::EndOfStatement)) 7737 break; 7738 7739 trySkipToken(AsmToken::Comma); 7740 res = parseOptionalOpr(Operands); 7741 } 7742 7743 return res; 7744 } 7745 7746 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7747 OperandMatchResultTy res; 7748 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7749 // try to parse any optional operand here 7750 if (Op.IsBit) { 7751 res = parseNamedBit(Op.Name, Operands, Op.Type); 7752 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7753 res = parseOModOperand(Operands); 7754 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7755 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7756 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7757 res = parseSDWASel(Operands, Op.Name, Op.Type); 7758 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7759 res = parseSDWADstUnused(Operands); 7760 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7761 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7762 Op.Type == AMDGPUOperand::ImmTyNegLo || 7763 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7764 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7765 Op.ConvertResult); 7766 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7767 res = parseDim(Operands); 7768 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7769 res = parseCPol(Operands); 7770 } else { 7771 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7772 if (Op.Type == AMDGPUOperand::ImmTyBLGP && res == MatchOperand_NoMatch) { 7773 res = parseOperandArrayWithPrefix("neg", Operands, 7774 AMDGPUOperand::ImmTyBLGP, 7775 nullptr); 7776 } 7777 } 7778 if (res != MatchOperand_NoMatch) { 7779 return res; 7780 } 7781 } 7782 return MatchOperand_NoMatch; 7783 } 7784 7785 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7786 StringRef Name = getTokenStr(); 7787 if (Name == "mul") { 7788 return parseIntWithPrefix("mul", Operands, 7789 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7790 } 7791 7792 if (Name == "div") { 7793 return parseIntWithPrefix("div", Operands, 7794 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7795 } 7796 7797 return MatchOperand_NoMatch; 7798 } 7799 7800 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7801 cvtVOP3P(Inst, Operands); 7802 7803 int Opc = Inst.getOpcode(); 7804 7805 int SrcNum; 7806 const int Ops[] = { AMDGPU::OpName::src0, 7807 AMDGPU::OpName::src1, 7808 AMDGPU::OpName::src2 }; 7809 for (SrcNum = 0; 7810 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7811 ++SrcNum); 7812 assert(SrcNum > 0); 7813 7814 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7815 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7816 7817 if ((OpSel & (1 << SrcNum)) != 0) { 7818 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7819 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7820 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7821 } 7822 } 7823 7824 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7825 // 1. This operand is input modifiers 7826 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7827 // 2. This is not last operand 7828 && Desc.NumOperands > (OpNum + 1) 7829 // 3. Next operand is register class 7830 && Desc.OpInfo[OpNum + 1].RegClass != -1 7831 // 4. Next register is not tied to any other operand 7832 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7833 } 7834 7835 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7836 { 7837 OptionalImmIndexMap OptionalIdx; 7838 unsigned Opc = Inst.getOpcode(); 7839 7840 unsigned I = 1; 7841 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7842 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7843 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7844 } 7845 7846 for (unsigned E = Operands.size(); I != E; ++I) { 7847 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7848 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7849 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7850 } else if (Op.isInterpSlot() || 7851 Op.isInterpAttr() || 7852 Op.isAttrChan()) { 7853 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7854 } else if (Op.isImmModifier()) { 7855 OptionalIdx[Op.getImmTy()] = I; 7856 } else { 7857 llvm_unreachable("unhandled operand type"); 7858 } 7859 } 7860 7861 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7862 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7863 } 7864 7865 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7866 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7867 } 7868 7869 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7870 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7871 } 7872 } 7873 7874 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7875 OptionalImmIndexMap &OptionalIdx) { 7876 unsigned Opc = Inst.getOpcode(); 7877 7878 unsigned I = 1; 7879 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7880 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7881 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7882 } 7883 7884 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7885 // This instruction has src modifiers 7886 for (unsigned E = Operands.size(); I != E; ++I) { 7887 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7888 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7889 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7890 } else if (Op.isImmModifier()) { 7891 OptionalIdx[Op.getImmTy()] = I; 7892 } else if (Op.isRegOrImm()) { 7893 Op.addRegOrImmOperands(Inst, 1); 7894 } else { 7895 llvm_unreachable("unhandled operand type"); 7896 } 7897 } 7898 } else { 7899 // No src modifiers 7900 for (unsigned E = Operands.size(); I != E; ++I) { 7901 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7902 if (Op.isMod()) { 7903 OptionalIdx[Op.getImmTy()] = I; 7904 } else { 7905 Op.addRegOrImmOperands(Inst, 1); 7906 } 7907 } 7908 } 7909 7910 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7911 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7912 } 7913 7914 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7915 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7916 } 7917 7918 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7919 // it has src2 register operand that is tied to dst operand 7920 // we don't allow modifiers for this operand in assembler so src2_modifiers 7921 // should be 0. 7922 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7923 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7924 Opc == AMDGPU::V_MAC_F32_e64_vi || 7925 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7926 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7927 Opc == AMDGPU::V_MAC_F16_e64_vi || 7928 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7929 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7930 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7931 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7932 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7933 auto it = Inst.begin(); 7934 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7935 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7936 ++it; 7937 // Copy the operand to ensure it's not invalidated when Inst grows. 7938 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7939 } 7940 } 7941 7942 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7943 OptionalImmIndexMap OptionalIdx; 7944 cvtVOP3(Inst, Operands, OptionalIdx); 7945 } 7946 7947 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7948 OptionalImmIndexMap &OptIdx) { 7949 const int Opc = Inst.getOpcode(); 7950 const MCInstrDesc &Desc = MII.get(Opc); 7951 7952 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7953 7954 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7955 assert(!IsPacked); 7956 Inst.addOperand(Inst.getOperand(0)); 7957 } 7958 7959 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7960 // instruction, and then figure out where to actually put the modifiers 7961 7962 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7963 if (OpSelIdx != -1) { 7964 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7965 } 7966 7967 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7968 if (OpSelHiIdx != -1) { 7969 int DefaultVal = IsPacked ? -1 : 0; 7970 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7971 DefaultVal); 7972 } 7973 7974 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7975 if (NegLoIdx != -1) { 7976 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7977 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7978 } 7979 7980 const int Ops[] = { AMDGPU::OpName::src0, 7981 AMDGPU::OpName::src1, 7982 AMDGPU::OpName::src2 }; 7983 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7984 AMDGPU::OpName::src1_modifiers, 7985 AMDGPU::OpName::src2_modifiers }; 7986 7987 unsigned OpSel = 0; 7988 unsigned OpSelHi = 0; 7989 unsigned NegLo = 0; 7990 unsigned NegHi = 0; 7991 7992 if (OpSelIdx != -1) 7993 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7994 7995 if (OpSelHiIdx != -1) 7996 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7997 7998 if (NegLoIdx != -1) { 7999 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 8000 NegLo = Inst.getOperand(NegLoIdx).getImm(); 8001 NegHi = Inst.getOperand(NegHiIdx).getImm(); 8002 } 8003 8004 for (int J = 0; J < 3; ++J) { 8005 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 8006 if (OpIdx == -1) 8007 break; 8008 8009 uint32_t ModVal = 0; 8010 8011 if ((OpSel & (1 << J)) != 0) 8012 ModVal |= SISrcMods::OP_SEL_0; 8013 8014 if ((OpSelHi & (1 << J)) != 0) 8015 ModVal |= SISrcMods::OP_SEL_1; 8016 8017 if ((NegLo & (1 << J)) != 0) 8018 ModVal |= SISrcMods::NEG; 8019 8020 if ((NegHi & (1 << J)) != 0) 8021 ModVal |= SISrcMods::NEG_HI; 8022 8023 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 8024 8025 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 8026 } 8027 } 8028 8029 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 8030 OptionalImmIndexMap OptIdx; 8031 cvtVOP3(Inst, Operands, OptIdx); 8032 cvtVOP3P(Inst, Operands, OptIdx); 8033 } 8034 8035 //===----------------------------------------------------------------------===// 8036 // dpp 8037 //===----------------------------------------------------------------------===// 8038 8039 bool AMDGPUOperand::isDPP8() const { 8040 return isImmTy(ImmTyDPP8); 8041 } 8042 8043 bool AMDGPUOperand::isDPPCtrl() const { 8044 using namespace AMDGPU::DPP; 8045 8046 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 8047 if (result) { 8048 int64_t Imm = getImm(); 8049 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 8050 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 8051 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 8052 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 8053 (Imm == DppCtrl::WAVE_SHL1) || 8054 (Imm == DppCtrl::WAVE_ROL1) || 8055 (Imm == DppCtrl::WAVE_SHR1) || 8056 (Imm == DppCtrl::WAVE_ROR1) || 8057 (Imm == DppCtrl::ROW_MIRROR) || 8058 (Imm == DppCtrl::ROW_HALF_MIRROR) || 8059 (Imm == DppCtrl::BCAST15) || 8060 (Imm == DppCtrl::BCAST31) || 8061 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 8062 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 8063 } 8064 return false; 8065 } 8066 8067 //===----------------------------------------------------------------------===// 8068 // mAI 8069 //===----------------------------------------------------------------------===// 8070 8071 bool AMDGPUOperand::isBLGP() const { 8072 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 8073 } 8074 8075 bool AMDGPUOperand::isCBSZ() const { 8076 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 8077 } 8078 8079 bool AMDGPUOperand::isABID() const { 8080 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 8081 } 8082 8083 bool AMDGPUOperand::isS16Imm() const { 8084 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 8085 } 8086 8087 bool AMDGPUOperand::isU16Imm() const { 8088 return isImm() && isUInt<16>(getImm()); 8089 } 8090 8091 //===----------------------------------------------------------------------===// 8092 // dim 8093 //===----------------------------------------------------------------------===// 8094 8095 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 8096 // We want to allow "dim:1D" etc., 8097 // but the initial 1 is tokenized as an integer. 8098 std::string Token; 8099 if (isToken(AsmToken::Integer)) { 8100 SMLoc Loc = getToken().getEndLoc(); 8101 Token = std::string(getTokenStr()); 8102 lex(); 8103 if (getLoc() != Loc) 8104 return false; 8105 } 8106 8107 StringRef Suffix; 8108 if (!parseId(Suffix)) 8109 return false; 8110 Token += Suffix; 8111 8112 StringRef DimId = Token; 8113 if (DimId.startswith("SQ_RSRC_IMG_")) 8114 DimId = DimId.drop_front(12); 8115 8116 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 8117 if (!DimInfo) 8118 return false; 8119 8120 Encoding = DimInfo->Encoding; 8121 return true; 8122 } 8123 8124 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 8125 if (!isGFX10Plus()) 8126 return MatchOperand_NoMatch; 8127 8128 SMLoc S = getLoc(); 8129 8130 if (!trySkipId("dim", AsmToken::Colon)) 8131 return MatchOperand_NoMatch; 8132 8133 unsigned Encoding; 8134 SMLoc Loc = getLoc(); 8135 if (!parseDimId(Encoding)) { 8136 Error(Loc, "invalid dim value"); 8137 return MatchOperand_ParseFail; 8138 } 8139 8140 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 8141 AMDGPUOperand::ImmTyDim)); 8142 return MatchOperand_Success; 8143 } 8144 8145 //===----------------------------------------------------------------------===// 8146 // dpp 8147 //===----------------------------------------------------------------------===// 8148 8149 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 8150 SMLoc S = getLoc(); 8151 8152 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 8153 return MatchOperand_NoMatch; 8154 8155 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 8156 8157 int64_t Sels[8]; 8158 8159 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8160 return MatchOperand_ParseFail; 8161 8162 for (size_t i = 0; i < 8; ++i) { 8163 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8164 return MatchOperand_ParseFail; 8165 8166 SMLoc Loc = getLoc(); 8167 if (getParser().parseAbsoluteExpression(Sels[i])) 8168 return MatchOperand_ParseFail; 8169 if (0 > Sels[i] || 7 < Sels[i]) { 8170 Error(Loc, "expected a 3-bit value"); 8171 return MatchOperand_ParseFail; 8172 } 8173 } 8174 8175 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8176 return MatchOperand_ParseFail; 8177 8178 unsigned DPP8 = 0; 8179 for (size_t i = 0; i < 8; ++i) 8180 DPP8 |= (Sels[i] << (i * 3)); 8181 8182 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 8183 return MatchOperand_Success; 8184 } 8185 8186 bool 8187 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 8188 const OperandVector &Operands) { 8189 if (Ctrl == "row_newbcast") 8190 return isGFX90A(); 8191 8192 if (Ctrl == "row_share" || 8193 Ctrl == "row_xmask") 8194 return isGFX10Plus(); 8195 8196 if (Ctrl == "wave_shl" || 8197 Ctrl == "wave_shr" || 8198 Ctrl == "wave_rol" || 8199 Ctrl == "wave_ror" || 8200 Ctrl == "row_bcast") 8201 return isVI() || isGFX9(); 8202 8203 return Ctrl == "row_mirror" || 8204 Ctrl == "row_half_mirror" || 8205 Ctrl == "quad_perm" || 8206 Ctrl == "row_shl" || 8207 Ctrl == "row_shr" || 8208 Ctrl == "row_ror"; 8209 } 8210 8211 int64_t 8212 AMDGPUAsmParser::parseDPPCtrlPerm() { 8213 // quad_perm:[%d,%d,%d,%d] 8214 8215 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 8216 return -1; 8217 8218 int64_t Val = 0; 8219 for (int i = 0; i < 4; ++i) { 8220 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 8221 return -1; 8222 8223 int64_t Temp; 8224 SMLoc Loc = getLoc(); 8225 if (getParser().parseAbsoluteExpression(Temp)) 8226 return -1; 8227 if (Temp < 0 || Temp > 3) { 8228 Error(Loc, "expected a 2-bit value"); 8229 return -1; 8230 } 8231 8232 Val += (Temp << i * 2); 8233 } 8234 8235 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 8236 return -1; 8237 8238 return Val; 8239 } 8240 8241 int64_t 8242 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 8243 using namespace AMDGPU::DPP; 8244 8245 // sel:%d 8246 8247 int64_t Val; 8248 SMLoc Loc = getLoc(); 8249 8250 if (getParser().parseAbsoluteExpression(Val)) 8251 return -1; 8252 8253 struct DppCtrlCheck { 8254 int64_t Ctrl; 8255 int Lo; 8256 int Hi; 8257 }; 8258 8259 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 8260 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 8261 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 8262 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 8263 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 8264 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 8265 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 8266 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 8267 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 8268 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 8269 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 8270 .Default({-1, 0, 0}); 8271 8272 bool Valid; 8273 if (Check.Ctrl == -1) { 8274 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 8275 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 8276 } else { 8277 Valid = Check.Lo <= Val && Val <= Check.Hi; 8278 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 8279 } 8280 8281 if (!Valid) { 8282 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 8283 return -1; 8284 } 8285 8286 return Val; 8287 } 8288 8289 OperandMatchResultTy 8290 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 8291 using namespace AMDGPU::DPP; 8292 8293 if (!isToken(AsmToken::Identifier) || 8294 !isSupportedDPPCtrl(getTokenStr(), Operands)) 8295 return MatchOperand_NoMatch; 8296 8297 SMLoc S = getLoc(); 8298 int64_t Val = -1; 8299 StringRef Ctrl; 8300 8301 parseId(Ctrl); 8302 8303 if (Ctrl == "row_mirror") { 8304 Val = DppCtrl::ROW_MIRROR; 8305 } else if (Ctrl == "row_half_mirror") { 8306 Val = DppCtrl::ROW_HALF_MIRROR; 8307 } else { 8308 if (skipToken(AsmToken::Colon, "expected a colon")) { 8309 if (Ctrl == "quad_perm") { 8310 Val = parseDPPCtrlPerm(); 8311 } else { 8312 Val = parseDPPCtrlSel(Ctrl); 8313 } 8314 } 8315 } 8316 8317 if (Val == -1) 8318 return MatchOperand_ParseFail; 8319 8320 Operands.push_back( 8321 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8322 return MatchOperand_Success; 8323 } 8324 8325 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8326 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8327 } 8328 8329 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8330 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8331 } 8332 8333 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8334 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8335 } 8336 8337 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8338 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8339 } 8340 8341 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8342 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8343 } 8344 8345 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8346 OptionalImmIndexMap OptionalIdx; 8347 8348 unsigned Opc = Inst.getOpcode(); 8349 bool HasModifiers = 8350 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8351 unsigned I = 1; 8352 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8353 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8354 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8355 } 8356 8357 int Fi = 0; 8358 for (unsigned E = Operands.size(); I != E; ++I) { 8359 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8360 MCOI::TIED_TO); 8361 if (TiedTo != -1) { 8362 assert((unsigned)TiedTo < Inst.getNumOperands()); 8363 // handle tied old or src2 for MAC instructions 8364 Inst.addOperand(Inst.getOperand(TiedTo)); 8365 } 8366 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8367 // Add the register arguments 8368 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8369 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8370 // Skip it. 8371 continue; 8372 } 8373 8374 if (IsDPP8) { 8375 if (Op.isDPP8()) { 8376 Op.addImmOperands(Inst, 1); 8377 } else if (HasModifiers && 8378 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8379 Op.addRegWithFPInputModsOperands(Inst, 2); 8380 } else if (Op.isFI()) { 8381 Fi = Op.getImm(); 8382 } else if (Op.isReg()) { 8383 Op.addRegOperands(Inst, 1); 8384 } else { 8385 llvm_unreachable("Invalid operand type"); 8386 } 8387 } else { 8388 if (HasModifiers && 8389 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8390 Op.addRegWithFPInputModsOperands(Inst, 2); 8391 } else if (Op.isReg()) { 8392 Op.addRegOperands(Inst, 1); 8393 } else if (Op.isDPPCtrl()) { 8394 Op.addImmOperands(Inst, 1); 8395 } else if (Op.isImm()) { 8396 // Handle optional arguments 8397 OptionalIdx[Op.getImmTy()] = I; 8398 } else { 8399 llvm_unreachable("Invalid operand type"); 8400 } 8401 } 8402 } 8403 8404 if (IsDPP8) { 8405 using namespace llvm::AMDGPU::DPP; 8406 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8407 } else { 8408 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8409 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8410 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8411 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8412 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8413 } 8414 } 8415 } 8416 8417 //===----------------------------------------------------------------------===// 8418 // sdwa 8419 //===----------------------------------------------------------------------===// 8420 8421 OperandMatchResultTy 8422 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8423 AMDGPUOperand::ImmTy Type) { 8424 using namespace llvm::AMDGPU::SDWA; 8425 8426 SMLoc S = getLoc(); 8427 StringRef Value; 8428 OperandMatchResultTy res; 8429 8430 SMLoc StringLoc; 8431 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8432 if (res != MatchOperand_Success) { 8433 return res; 8434 } 8435 8436 int64_t Int; 8437 Int = StringSwitch<int64_t>(Value) 8438 .Case("BYTE_0", SdwaSel::BYTE_0) 8439 .Case("BYTE_1", SdwaSel::BYTE_1) 8440 .Case("BYTE_2", SdwaSel::BYTE_2) 8441 .Case("BYTE_3", SdwaSel::BYTE_3) 8442 .Case("WORD_0", SdwaSel::WORD_0) 8443 .Case("WORD_1", SdwaSel::WORD_1) 8444 .Case("DWORD", SdwaSel::DWORD) 8445 .Default(0xffffffff); 8446 8447 if (Int == 0xffffffff) { 8448 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8449 return MatchOperand_ParseFail; 8450 } 8451 8452 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8453 return MatchOperand_Success; 8454 } 8455 8456 OperandMatchResultTy 8457 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8458 using namespace llvm::AMDGPU::SDWA; 8459 8460 SMLoc S = getLoc(); 8461 StringRef Value; 8462 OperandMatchResultTy res; 8463 8464 SMLoc StringLoc; 8465 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8466 if (res != MatchOperand_Success) { 8467 return res; 8468 } 8469 8470 int64_t Int; 8471 Int = StringSwitch<int64_t>(Value) 8472 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8473 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8474 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8475 .Default(0xffffffff); 8476 8477 if (Int == 0xffffffff) { 8478 Error(StringLoc, "invalid dst_unused value"); 8479 return MatchOperand_ParseFail; 8480 } 8481 8482 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8483 return MatchOperand_Success; 8484 } 8485 8486 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8487 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8488 } 8489 8490 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8491 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8492 } 8493 8494 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8495 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8496 } 8497 8498 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8499 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8500 } 8501 8502 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8503 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8504 } 8505 8506 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8507 uint64_t BasicInstType, 8508 bool SkipDstVcc, 8509 bool SkipSrcVcc) { 8510 using namespace llvm::AMDGPU::SDWA; 8511 8512 OptionalImmIndexMap OptionalIdx; 8513 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8514 bool SkippedVcc = false; 8515 8516 unsigned I = 1; 8517 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8518 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8519 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8520 } 8521 8522 for (unsigned E = Operands.size(); I != E; ++I) { 8523 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8524 if (SkipVcc && !SkippedVcc && Op.isReg() && 8525 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8526 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8527 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8528 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8529 // Skip VCC only if we didn't skip it on previous iteration. 8530 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8531 if (BasicInstType == SIInstrFlags::VOP2 && 8532 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8533 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8534 SkippedVcc = true; 8535 continue; 8536 } else if (BasicInstType == SIInstrFlags::VOPC && 8537 Inst.getNumOperands() == 0) { 8538 SkippedVcc = true; 8539 continue; 8540 } 8541 } 8542 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8543 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8544 } else if (Op.isImm()) { 8545 // Handle optional arguments 8546 OptionalIdx[Op.getImmTy()] = I; 8547 } else { 8548 llvm_unreachable("Invalid operand type"); 8549 } 8550 SkippedVcc = false; 8551 } 8552 8553 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8554 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8555 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8556 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8557 switch (BasicInstType) { 8558 case SIInstrFlags::VOP1: 8559 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8560 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8561 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8562 } 8563 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8564 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8565 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8566 break; 8567 8568 case SIInstrFlags::VOP2: 8569 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8570 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8571 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8572 } 8573 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8575 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8576 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8577 break; 8578 8579 case SIInstrFlags::VOPC: 8580 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8582 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8583 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8584 break; 8585 8586 default: 8587 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8588 } 8589 } 8590 8591 // special case v_mac_{f16, f32}: 8592 // it has src2 register operand that is tied to dst operand 8593 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8594 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8595 auto it = Inst.begin(); 8596 std::advance( 8597 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8598 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8599 } 8600 } 8601 8602 //===----------------------------------------------------------------------===// 8603 // mAI 8604 //===----------------------------------------------------------------------===// 8605 8606 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8607 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8608 } 8609 8610 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8611 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8612 } 8613 8614 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8615 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8616 } 8617 8618 /// Force static initialization. 8619 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8620 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8621 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8622 } 8623 8624 #define GET_REGISTER_MATCHER 8625 #define GET_MATCHER_IMPLEMENTATION 8626 #define GET_MNEMONIC_SPELL_CHECKER 8627 #define GET_MNEMONIC_CHECKER 8628 #include "AMDGPUGenAsmMatcher.inc" 8629 8630 // This function should be defined after auto-generated include so that we have 8631 // MatchClassKind enum defined 8632 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8633 unsigned Kind) { 8634 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8635 // But MatchInstructionImpl() expects to meet token and fails to validate 8636 // operand. This method checks if we are given immediate operand but expect to 8637 // get corresponding token. 8638 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8639 switch (Kind) { 8640 case MCK_addr64: 8641 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8642 case MCK_gds: 8643 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8644 case MCK_lds: 8645 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8646 case MCK_idxen: 8647 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8648 case MCK_offen: 8649 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8650 case MCK_SSrcB32: 8651 // When operands have expression values, they will return true for isToken, 8652 // because it is not possible to distinguish between a token and an 8653 // expression at parse time. MatchInstructionImpl() will always try to 8654 // match an operand as a token, when isToken returns true, and when the 8655 // name of the expression is not a valid token, the match will fail, 8656 // so we need to handle it here. 8657 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8658 case MCK_SSrcF32: 8659 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8660 case MCK_SoppBrTarget: 8661 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8662 case MCK_VReg32OrOff: 8663 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8664 case MCK_InterpSlot: 8665 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8666 case MCK_Attr: 8667 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8668 case MCK_AttrChan: 8669 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8670 case MCK_ImmSMEMOffset: 8671 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8672 case MCK_SReg_64: 8673 case MCK_SReg_64_XEXEC: 8674 // Null is defined as a 32-bit register but 8675 // it should also be enabled with 64-bit operands. 8676 // The following code enables it for SReg_64 operands 8677 // used as source and destination. Remaining source 8678 // operands are handled in isInlinableImm. 8679 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8680 default: 8681 return Match_InvalidOperand; 8682 } 8683 } 8684 8685 //===----------------------------------------------------------------------===// 8686 // endpgm 8687 //===----------------------------------------------------------------------===// 8688 8689 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8690 SMLoc S = getLoc(); 8691 int64_t Imm = 0; 8692 8693 if (!parseExpr(Imm)) { 8694 // The operand is optional, if not present default to 0 8695 Imm = 0; 8696 } 8697 8698 if (!isUInt<16>(Imm)) { 8699 Error(S, "expected a 16-bit value"); 8700 return MatchOperand_ParseFail; 8701 } 8702 8703 Operands.push_back( 8704 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8705 return MatchOperand_Success; 8706 } 8707 8708 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8709