1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/MC/TargetRegistry.h" 32 #include "llvm/Support/AMDGPUMetadata.h" 33 #include "llvm/Support/AMDHSAKernelDescriptor.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/MachineValueType.h" 36 #include "llvm/Support/TargetParser.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrInline(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type); 251 } 252 253 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 254 return isRegOrInline(RCID, type) || isLiteralImm(type); 255 } 256 257 bool isRegOrImmWithInt16InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 259 } 260 261 bool isRegOrImmWithInt32InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 263 } 264 265 bool isRegOrImmWithInt64InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 267 } 268 269 bool isRegOrImmWithFP16InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 271 } 272 273 bool isRegOrImmWithFP32InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 275 } 276 277 bool isRegOrImmWithFP64InputMods() const { 278 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 279 } 280 281 bool isVReg() const { 282 return isRegClass(AMDGPU::VGPR_32RegClassID) || 283 isRegClass(AMDGPU::VReg_64RegClassID) || 284 isRegClass(AMDGPU::VReg_96RegClassID) || 285 isRegClass(AMDGPU::VReg_128RegClassID) || 286 isRegClass(AMDGPU::VReg_160RegClassID) || 287 isRegClass(AMDGPU::VReg_192RegClassID) || 288 isRegClass(AMDGPU::VReg_256RegClassID) || 289 isRegClass(AMDGPU::VReg_512RegClassID) || 290 isRegClass(AMDGPU::VReg_1024RegClassID); 291 } 292 293 bool isVReg32() const { 294 return isRegClass(AMDGPU::VGPR_32RegClassID); 295 } 296 297 bool isVReg32OrOff() const { 298 return isOff() || isVReg32(); 299 } 300 301 bool isNull() const { 302 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 303 } 304 305 bool isVRegWithInputMods() const; 306 307 bool isSDWAOperand(MVT type) const; 308 bool isSDWAFP16Operand() const; 309 bool isSDWAFP32Operand() const; 310 bool isSDWAInt16Operand() const; 311 bool isSDWAInt32Operand() const; 312 313 bool isImmTy(ImmTy ImmT) const { 314 return isImm() && Imm.Type == ImmT; 315 } 316 317 bool isImmModifier() const { 318 return isImm() && Imm.Type != ImmTyNone; 319 } 320 321 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 322 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 323 bool isDMask() const { return isImmTy(ImmTyDMask); } 324 bool isDim() const { return isImmTy(ImmTyDim); } 325 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 326 bool isDA() const { return isImmTy(ImmTyDA); } 327 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 328 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 329 bool isLWE() const { return isImmTy(ImmTyLWE); } 330 bool isOff() const { return isImmTy(ImmTyOff); } 331 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 332 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 333 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 334 bool isOffen() const { return isImmTy(ImmTyOffen); } 335 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 336 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 337 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 338 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 339 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 340 341 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 342 bool isGDS() const { return isImmTy(ImmTyGDS); } 343 bool isLDS() const { return isImmTy(ImmTyLDS); } 344 bool isCPol() const { return isImmTy(ImmTyCPol); } 345 bool isSWZ() const { return isImmTy(ImmTySWZ); } 346 bool isTFE() const { return isImmTy(ImmTyTFE); } 347 bool isD16() const { return isImmTy(ImmTyD16); } 348 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 349 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 350 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 351 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 352 bool isFI() const { return isImmTy(ImmTyDppFi); } 353 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 354 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 355 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 356 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 357 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 358 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 359 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 360 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 361 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 362 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 363 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 364 bool isHigh() const { return isImmTy(ImmTyHigh); } 365 366 bool isMod() const { 367 return isClampSI() || isOModSI(); 368 } 369 370 bool isRegOrImm() const { 371 return isReg() || isImm(); 372 } 373 374 bool isRegClass(unsigned RCID) const; 375 376 bool isInlineValue() const; 377 378 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 379 return isRegOrInline(RCID, type) && !hasModifiers(); 380 } 381 382 bool isSCSrcB16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 384 } 385 386 bool isSCSrcV2B16() const { 387 return isSCSrcB16(); 388 } 389 390 bool isSCSrcB32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 392 } 393 394 bool isSCSrcB64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 396 } 397 398 bool isBoolReg() const; 399 400 bool isSCSrcF16() const { 401 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 402 } 403 404 bool isSCSrcV2F16() const { 405 return isSCSrcF16(); 406 } 407 408 bool isSCSrcF32() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 410 } 411 412 bool isSCSrcF64() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 414 } 415 416 bool isSSrcB32() const { 417 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 418 } 419 420 bool isSSrcB16() const { 421 return isSCSrcB16() || isLiteralImm(MVT::i16); 422 } 423 424 bool isSSrcV2B16() const { 425 llvm_unreachable("cannot happen"); 426 return isSSrcB16(); 427 } 428 429 bool isSSrcB64() const { 430 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 431 // See isVSrc64(). 432 return isSCSrcB64() || isLiteralImm(MVT::i64); 433 } 434 435 bool isSSrcF32() const { 436 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 437 } 438 439 bool isSSrcF64() const { 440 return isSCSrcB64() || isLiteralImm(MVT::f64); 441 } 442 443 bool isSSrcF16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::f16); 445 } 446 447 bool isSSrcV2F16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcF16(); 450 } 451 452 bool isSSrcV2FP32() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF32(); 455 } 456 457 bool isSCSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSCSrcF32(); 460 } 461 462 bool isSSrcV2INT32() const { 463 llvm_unreachable("cannot happen"); 464 return isSSrcB32(); 465 } 466 467 bool isSCSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSCSrcB32(); 470 } 471 472 bool isSSrcOrLdsB32() const { 473 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 474 isLiteralImm(MVT::i32) || isExpr(); 475 } 476 477 bool isVCSrcB32() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 479 } 480 481 bool isVCSrcB64() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 483 } 484 485 bool isVCSrcB16() const { 486 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 487 } 488 489 bool isVCSrcV2B16() const { 490 return isVCSrcB16(); 491 } 492 493 bool isVCSrcF32() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 495 } 496 497 bool isVCSrcF64() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 499 } 500 501 bool isVCSrcF16() const { 502 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 503 } 504 505 bool isVCSrcV2F16() const { 506 return isVCSrcF16(); 507 } 508 509 bool isVSrcB32() const { 510 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 511 } 512 513 bool isVSrcB64() const { 514 return isVCSrcF64() || isLiteralImm(MVT::i64); 515 } 516 517 bool isVSrcB16() const { 518 return isVCSrcB16() || isLiteralImm(MVT::i16); 519 } 520 521 bool isVSrcV2B16() const { 522 return isVSrcB16() || isLiteralImm(MVT::v2i16); 523 } 524 525 bool isVCSrcV2FP32() const { 526 return isVCSrcF64(); 527 } 528 529 bool isVSrcV2FP32() const { 530 return isVSrcF64() || isLiteralImm(MVT::v2f32); 531 } 532 533 bool isVCSrcV2INT32() const { 534 return isVCSrcB64(); 535 } 536 537 bool isVSrcV2INT32() const { 538 return isVSrcB64() || isLiteralImm(MVT::v2i32); 539 } 540 541 bool isVSrcF32() const { 542 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 543 } 544 545 bool isVSrcF64() const { 546 return isVCSrcF64() || isLiteralImm(MVT::f64); 547 } 548 549 bool isVSrcF16() const { 550 return isVCSrcF16() || isLiteralImm(MVT::f16); 551 } 552 553 bool isVSrcV2F16() const { 554 return isVSrcF16() || isLiteralImm(MVT::v2f16); 555 } 556 557 bool isVISrcB32() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 559 } 560 561 bool isVISrcB16() const { 562 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 563 } 564 565 bool isVISrcV2B16() const { 566 return isVISrcB16(); 567 } 568 569 bool isVISrcF32() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 571 } 572 573 bool isVISrcF16() const { 574 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 575 } 576 577 bool isVISrcV2F16() const { 578 return isVISrcF16() || isVISrcB32(); 579 } 580 581 bool isVISrc_64B64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 583 } 584 585 bool isVISrc_64F64() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 587 } 588 589 bool isVISrc_64V2FP32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 591 } 592 593 bool isVISrc_64V2INT32() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 595 } 596 597 bool isVISrc_256B64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 599 } 600 601 bool isVISrc_256F64() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 603 } 604 605 bool isVISrc_128B16() const { 606 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 607 } 608 609 bool isVISrc_128V2B16() const { 610 return isVISrc_128B16(); 611 } 612 613 bool isVISrc_128B32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 615 } 616 617 bool isVISrc_128F32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2FP32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 623 } 624 625 bool isVISrc_256V2INT32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B32() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 631 } 632 633 bool isVISrc_512B16() const { 634 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 635 } 636 637 bool isVISrc_512V2B16() const { 638 return isVISrc_512B16(); 639 } 640 641 bool isVISrc_512F32() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 643 } 644 645 bool isVISrc_512F16() const { 646 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 647 } 648 649 bool isVISrc_512V2F16() const { 650 return isVISrc_512F16() || isVISrc_512B32(); 651 } 652 653 bool isVISrc_1024B32() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 655 } 656 657 bool isVISrc_1024B16() const { 658 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 659 } 660 661 bool isVISrc_1024V2B16() const { 662 return isVISrc_1024B16(); 663 } 664 665 bool isVISrc_1024F32() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 667 } 668 669 bool isVISrc_1024F16() const { 670 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 671 } 672 673 bool isVISrc_1024V2F16() const { 674 return isVISrc_1024F16() || isVISrc_1024B32(); 675 } 676 677 bool isAISrcB32() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 679 } 680 681 bool isAISrcB16() const { 682 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 683 } 684 685 bool isAISrcV2B16() const { 686 return isAISrcB16(); 687 } 688 689 bool isAISrcF32() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 691 } 692 693 bool isAISrcF16() const { 694 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 695 } 696 697 bool isAISrcV2F16() const { 698 return isAISrcF16() || isAISrcB32(); 699 } 700 701 bool isAISrc_64B64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 703 } 704 705 bool isAISrc_64F64() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 707 } 708 709 bool isAISrc_128B32() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 711 } 712 713 bool isAISrc_128B16() const { 714 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 715 } 716 717 bool isAISrc_128V2B16() const { 718 return isAISrc_128B16(); 719 } 720 721 bool isAISrc_128F32() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 723 } 724 725 bool isAISrc_128F16() const { 726 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 727 } 728 729 bool isAISrc_128V2F16() const { 730 return isAISrc_128F16() || isAISrc_128B32(); 731 } 732 733 bool isVISrc_128F16() const { 734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 735 } 736 737 bool isVISrc_128V2F16() const { 738 return isVISrc_128F16() || isVISrc_128B32(); 739 } 740 741 bool isAISrc_256B64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 743 } 744 745 bool isAISrc_256F64() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 747 } 748 749 bool isAISrc_512B32() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 751 } 752 753 bool isAISrc_512B16() const { 754 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 755 } 756 757 bool isAISrc_512V2B16() const { 758 return isAISrc_512B16(); 759 } 760 761 bool isAISrc_512F32() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 763 } 764 765 bool isAISrc_512F16() const { 766 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 767 } 768 769 bool isAISrc_512V2F16() const { 770 return isAISrc_512F16() || isAISrc_512B32(); 771 } 772 773 bool isAISrc_1024B32() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 775 } 776 777 bool isAISrc_1024B16() const { 778 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 779 } 780 781 bool isAISrc_1024V2B16() const { 782 return isAISrc_1024B16(); 783 } 784 785 bool isAISrc_1024F32() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 787 } 788 789 bool isAISrc_1024F16() const { 790 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 791 } 792 793 bool isAISrc_1024V2F16() const { 794 return isAISrc_1024F16() || isAISrc_1024B32(); 795 } 796 797 bool isKImmFP32() const { 798 return isLiteralImm(MVT::f32); 799 } 800 801 bool isKImmFP16() const { 802 return isLiteralImm(MVT::f16); 803 } 804 805 bool isMem() const override { 806 return false; 807 } 808 809 bool isExpr() const { 810 return Kind == Expression; 811 } 812 813 bool isSoppBrTarget() const { 814 return isExpr() || isImm(); 815 } 816 817 bool isSWaitCnt() const; 818 bool isHwreg() const; 819 bool isSendMsg() const; 820 bool isSwizzle() const; 821 bool isSMRDOffset8() const; 822 bool isSMEMOffset() const; 823 bool isSMRDLiteralOffset() const; 824 bool isDPP8() const; 825 bool isDPPCtrl() const; 826 bool isBLGP() const; 827 bool isCBSZ() const; 828 bool isABID() const; 829 bool isGPRIdxMode() const; 830 bool isS16Imm() const; 831 bool isU16Imm() const; 832 bool isEndpgm() const; 833 834 StringRef getExpressionAsToken() const { 835 assert(isExpr()); 836 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 837 return S->getSymbol().getName(); 838 } 839 840 StringRef getToken() const { 841 assert(isToken()); 842 843 if (Kind == Expression) 844 return getExpressionAsToken(); 845 846 return StringRef(Tok.Data, Tok.Length); 847 } 848 849 int64_t getImm() const { 850 assert(isImm()); 851 return Imm.Val; 852 } 853 854 void setImm(int64_t Val) { 855 assert(isImm()); 856 Imm.Val = Val; 857 } 858 859 ImmTy getImmTy() const { 860 assert(isImm()); 861 return Imm.Type; 862 } 863 864 unsigned getReg() const override { 865 assert(isRegKind()); 866 return Reg.RegNo; 867 } 868 869 SMLoc getStartLoc() const override { 870 return StartLoc; 871 } 872 873 SMLoc getEndLoc() const override { 874 return EndLoc; 875 } 876 877 SMRange getLocRange() const { 878 return SMRange(StartLoc, EndLoc); 879 } 880 881 Modifiers getModifiers() const { 882 assert(isRegKind() || isImmTy(ImmTyNone)); 883 return isRegKind() ? Reg.Mods : Imm.Mods; 884 } 885 886 void setModifiers(Modifiers Mods) { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 if (isRegKind()) 889 Reg.Mods = Mods; 890 else 891 Imm.Mods = Mods; 892 } 893 894 bool hasModifiers() const { 895 return getModifiers().hasModifiers(); 896 } 897 898 bool hasFPModifiers() const { 899 return getModifiers().hasFPModifiers(); 900 } 901 902 bool hasIntModifiers() const { 903 return getModifiers().hasIntModifiers(); 904 } 905 906 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 907 908 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 909 910 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 911 912 template <unsigned Bitwidth> 913 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 914 915 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<16>(Inst, N); 917 } 918 919 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 920 addKImmFPOperands<32>(Inst, N); 921 } 922 923 void addRegOperands(MCInst &Inst, unsigned N) const; 924 925 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 926 addRegOperands(Inst, N); 927 } 928 929 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 930 if (isRegKind()) 931 addRegOperands(Inst, N); 932 else if (isExpr()) 933 Inst.addOperand(MCOperand::createExpr(Expr)); 934 else 935 addImmOperands(Inst, N); 936 } 937 938 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 939 Modifiers Mods = getModifiers(); 940 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 941 if (isRegKind()) { 942 addRegOperands(Inst, N); 943 } else { 944 addImmOperands(Inst, N, false); 945 } 946 } 947 948 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 949 assert(!hasIntModifiers()); 950 addRegOrImmWithInputModsOperands(Inst, N); 951 } 952 953 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasFPModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 959 Modifiers Mods = getModifiers(); 960 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 961 assert(isRegKind()); 962 addRegOperands(Inst, N); 963 } 964 965 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 966 assert(!hasIntModifiers()); 967 addRegWithInputModsOperands(Inst, N); 968 } 969 970 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasFPModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 976 if (isImm()) 977 addImmOperands(Inst, N); 978 else { 979 assert(isExpr()); 980 Inst.addOperand(MCOperand::createExpr(Expr)); 981 } 982 } 983 984 static void printImmTy(raw_ostream& OS, ImmTy Type) { 985 switch (Type) { 986 case ImmTyNone: OS << "None"; break; 987 case ImmTyGDS: OS << "GDS"; break; 988 case ImmTyLDS: OS << "LDS"; break; 989 case ImmTyOffen: OS << "Offen"; break; 990 case ImmTyIdxen: OS << "Idxen"; break; 991 case ImmTyAddr64: OS << "Addr64"; break; 992 case ImmTyOffset: OS << "Offset"; break; 993 case ImmTyInstOffset: OS << "InstOffset"; break; 994 case ImmTyOffset0: OS << "Offset0"; break; 995 case ImmTyOffset1: OS << "Offset1"; break; 996 case ImmTyCPol: OS << "CPol"; break; 997 case ImmTySWZ: OS << "SWZ"; break; 998 case ImmTyTFE: OS << "TFE"; break; 999 case ImmTyD16: OS << "D16"; break; 1000 case ImmTyFORMAT: OS << "FORMAT"; break; 1001 case ImmTyClampSI: OS << "ClampSI"; break; 1002 case ImmTyOModSI: OS << "OModSI"; break; 1003 case ImmTyDPP8: OS << "DPP8"; break; 1004 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1005 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1006 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1007 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1008 case ImmTyDppFi: OS << "FI"; break; 1009 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1010 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1011 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1012 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1013 case ImmTyDMask: OS << "DMask"; break; 1014 case ImmTyDim: OS << "Dim"; break; 1015 case ImmTyUNorm: OS << "UNorm"; break; 1016 case ImmTyDA: OS << "DA"; break; 1017 case ImmTyR128A16: OS << "R128A16"; break; 1018 case ImmTyA16: OS << "A16"; break; 1019 case ImmTyLWE: OS << "LWE"; break; 1020 case ImmTyOff: OS << "Off"; break; 1021 case ImmTyExpTgt: OS << "ExpTgt"; break; 1022 case ImmTyExpCompr: OS << "ExpCompr"; break; 1023 case ImmTyExpVM: OS << "ExpVM"; break; 1024 case ImmTyHwreg: OS << "Hwreg"; break; 1025 case ImmTySendMsg: OS << "SendMsg"; break; 1026 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1027 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1028 case ImmTyAttrChan: OS << "AttrChan"; break; 1029 case ImmTyOpSel: OS << "OpSel"; break; 1030 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1031 case ImmTyNegLo: OS << "NegLo"; break; 1032 case ImmTyNegHi: OS << "NegHi"; break; 1033 case ImmTySwizzle: OS << "Swizzle"; break; 1034 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1035 case ImmTyHigh: OS << "High"; break; 1036 case ImmTyBLGP: OS << "BLGP"; break; 1037 case ImmTyCBSZ: OS << "CBSZ"; break; 1038 case ImmTyABID: OS << "ABID"; break; 1039 case ImmTyEndpgm: OS << "Endpgm"; break; 1040 } 1041 } 1042 1043 void print(raw_ostream &OS) const override { 1044 switch (Kind) { 1045 case Register: 1046 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1047 break; 1048 case Immediate: 1049 OS << '<' << getImm(); 1050 if (getImmTy() != ImmTyNone) { 1051 OS << " type: "; printImmTy(OS, getImmTy()); 1052 } 1053 OS << " mods: " << Imm.Mods << '>'; 1054 break; 1055 case Token: 1056 OS << '\'' << getToken() << '\''; 1057 break; 1058 case Expression: 1059 OS << "<expr " << *Expr << '>'; 1060 break; 1061 } 1062 } 1063 1064 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1065 int64_t Val, SMLoc Loc, 1066 ImmTy Type = ImmTyNone, 1067 bool IsFPImm = false) { 1068 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1069 Op->Imm.Val = Val; 1070 Op->Imm.IsFPImm = IsFPImm; 1071 Op->Imm.Kind = ImmKindTyNone; 1072 Op->Imm.Type = Type; 1073 Op->Imm.Mods = Modifiers(); 1074 Op->StartLoc = Loc; 1075 Op->EndLoc = Loc; 1076 return Op; 1077 } 1078 1079 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1080 StringRef Str, SMLoc Loc, 1081 bool HasExplicitEncodingSize = true) { 1082 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1083 Res->Tok.Data = Str.data(); 1084 Res->Tok.Length = Str.size(); 1085 Res->StartLoc = Loc; 1086 Res->EndLoc = Loc; 1087 return Res; 1088 } 1089 1090 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1091 unsigned RegNo, SMLoc S, 1092 SMLoc E) { 1093 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1094 Op->Reg.RegNo = RegNo; 1095 Op->Reg.Mods = Modifiers(); 1096 Op->StartLoc = S; 1097 Op->EndLoc = E; 1098 return Op; 1099 } 1100 1101 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1102 const class MCExpr *Expr, SMLoc S) { 1103 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1104 Op->Expr = Expr; 1105 Op->StartLoc = S; 1106 Op->EndLoc = S; 1107 return Op; 1108 } 1109 }; 1110 1111 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1112 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1113 return OS; 1114 } 1115 1116 //===----------------------------------------------------------------------===// 1117 // AsmParser 1118 //===----------------------------------------------------------------------===// 1119 1120 // Holds info related to the current kernel, e.g. count of SGPRs used. 1121 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1122 // .amdgpu_hsa_kernel or at EOF. 1123 class KernelScopeInfo { 1124 int SgprIndexUnusedMin = -1; 1125 int VgprIndexUnusedMin = -1; 1126 MCContext *Ctx = nullptr; 1127 1128 void usesSgprAt(int i) { 1129 if (i >= SgprIndexUnusedMin) { 1130 SgprIndexUnusedMin = ++i; 1131 if (Ctx) { 1132 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1133 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1134 } 1135 } 1136 } 1137 1138 void usesVgprAt(int i) { 1139 if (i >= VgprIndexUnusedMin) { 1140 VgprIndexUnusedMin = ++i; 1141 if (Ctx) { 1142 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1143 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1144 } 1145 } 1146 } 1147 1148 public: 1149 KernelScopeInfo() = default; 1150 1151 void initialize(MCContext &Context) { 1152 Ctx = &Context; 1153 usesSgprAt(SgprIndexUnusedMin = -1); 1154 usesVgprAt(VgprIndexUnusedMin = -1); 1155 } 1156 1157 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1158 switch (RegKind) { 1159 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1160 case IS_AGPR: // fall through 1161 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1162 default: break; 1163 } 1164 } 1165 }; 1166 1167 class AMDGPUAsmParser : public MCTargetAsmParser { 1168 MCAsmParser &Parser; 1169 1170 // Number of extra operands parsed after the first optional operand. 1171 // This may be necessary to skip hardcoded mandatory operands. 1172 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1173 1174 unsigned ForcedEncodingSize = 0; 1175 bool ForcedDPP = false; 1176 bool ForcedSDWA = false; 1177 KernelScopeInfo KernelScope; 1178 unsigned CPolSeen; 1179 1180 /// @name Auto-generated Match Functions 1181 /// { 1182 1183 #define GET_ASSEMBLER_HEADER 1184 #include "AMDGPUGenAsmMatcher.inc" 1185 1186 /// } 1187 1188 private: 1189 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1190 bool OutOfRangeError(SMRange Range); 1191 /// Calculate VGPR/SGPR blocks required for given target, reserved 1192 /// registers, and user-specified NextFreeXGPR values. 1193 /// 1194 /// \param Features [in] Target features, used for bug corrections. 1195 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1196 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1197 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1198 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1199 /// descriptor field, if valid. 1200 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1201 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1202 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1203 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1204 /// \param VGPRBlocks [out] Result VGPR block count. 1205 /// \param SGPRBlocks [out] Result SGPR block count. 1206 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1207 bool FlatScrUsed, bool XNACKUsed, 1208 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1209 SMRange VGPRRange, unsigned NextFreeSGPR, 1210 SMRange SGPRRange, unsigned &VGPRBlocks, 1211 unsigned &SGPRBlocks); 1212 bool ParseDirectiveAMDGCNTarget(); 1213 bool ParseDirectiveAMDHSAKernel(); 1214 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1215 bool ParseDirectiveHSACodeObjectVersion(); 1216 bool ParseDirectiveHSACodeObjectISA(); 1217 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1218 bool ParseDirectiveAMDKernelCodeT(); 1219 // TODO: Possibly make subtargetHasRegister const. 1220 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1221 bool ParseDirectiveAMDGPUHsaKernel(); 1222 1223 bool ParseDirectiveISAVersion(); 1224 bool ParseDirectiveHSAMetadata(); 1225 bool ParseDirectivePALMetadataBegin(); 1226 bool ParseDirectivePALMetadata(); 1227 bool ParseDirectiveAMDGPULDS(); 1228 1229 /// Common code to parse out a block of text (typically YAML) between start and 1230 /// end directives. 1231 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1232 const char *AssemblerDirectiveEnd, 1233 std::string &CollectString); 1234 1235 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1236 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1237 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1238 unsigned &RegNum, unsigned &RegWidth, 1239 bool RestoreOnFailure = false); 1240 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1241 unsigned &RegNum, unsigned &RegWidth, 1242 SmallVectorImpl<AsmToken> &Tokens); 1243 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1244 unsigned &RegWidth, 1245 SmallVectorImpl<AsmToken> &Tokens); 1246 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1247 unsigned &RegWidth, 1248 SmallVectorImpl<AsmToken> &Tokens); 1249 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1250 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1251 bool ParseRegRange(unsigned& Num, unsigned& Width); 1252 unsigned getRegularReg(RegisterKind RegKind, 1253 unsigned RegNum, 1254 unsigned RegWidth, 1255 SMLoc Loc); 1256 1257 bool isRegister(); 1258 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1259 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1260 void initializeGprCountSymbol(RegisterKind RegKind); 1261 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1262 unsigned RegWidth); 1263 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1264 bool IsAtomic, bool IsLds = false); 1265 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1266 bool IsGdsHardcoded); 1267 1268 public: 1269 enum AMDGPUMatchResultTy { 1270 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1271 }; 1272 enum OperandMode { 1273 OperandMode_Default, 1274 OperandMode_NSA, 1275 }; 1276 1277 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1278 1279 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1280 const MCInstrInfo &MII, 1281 const MCTargetOptions &Options) 1282 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1283 MCAsmParserExtension::Initialize(Parser); 1284 1285 if (getFeatureBits().none()) { 1286 // Set default features. 1287 copySTI().ToggleFeature("southern-islands"); 1288 } 1289 1290 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1291 1292 { 1293 // TODO: make those pre-defined variables read-only. 1294 // Currently there is none suitable machinery in the core llvm-mc for this. 1295 // MCSymbol::isRedefinable is intended for another purpose, and 1296 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1297 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1298 MCContext &Ctx = getContext(); 1299 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1300 MCSymbol *Sym = 1301 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1303 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1304 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1305 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1307 } else { 1308 MCSymbol *Sym = 1309 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1311 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1312 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1313 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1314 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1315 } 1316 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1317 initializeGprCountSymbol(IS_VGPR); 1318 initializeGprCountSymbol(IS_SGPR); 1319 } else 1320 KernelScope.initialize(getContext()); 1321 } 1322 } 1323 1324 bool hasMIMG_R128() const { 1325 return AMDGPU::hasMIMG_R128(getSTI()); 1326 } 1327 1328 bool hasPackedD16() const { 1329 return AMDGPU::hasPackedD16(getSTI()); 1330 } 1331 1332 bool hasGFX10A16() const { 1333 return AMDGPU::hasGFX10A16(getSTI()); 1334 } 1335 1336 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1337 1338 bool isSI() const { 1339 return AMDGPU::isSI(getSTI()); 1340 } 1341 1342 bool isCI() const { 1343 return AMDGPU::isCI(getSTI()); 1344 } 1345 1346 bool isVI() const { 1347 return AMDGPU::isVI(getSTI()); 1348 } 1349 1350 bool isGFX9() const { 1351 return AMDGPU::isGFX9(getSTI()); 1352 } 1353 1354 bool isGFX90A() const { 1355 return AMDGPU::isGFX90A(getSTI()); 1356 } 1357 1358 bool isGFX9Plus() const { 1359 return AMDGPU::isGFX9Plus(getSTI()); 1360 } 1361 1362 bool isGFX10() const { 1363 return AMDGPU::isGFX10(getSTI()); 1364 } 1365 1366 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1367 1368 bool isGFX10_BEncoding() const { 1369 return AMDGPU::isGFX10_BEncoding(getSTI()); 1370 } 1371 1372 bool hasInv2PiInlineImm() const { 1373 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1374 } 1375 1376 bool hasFlatOffsets() const { 1377 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1378 } 1379 1380 bool hasArchitectedFlatScratch() const { 1381 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1382 } 1383 1384 bool hasSGPR102_SGPR103() const { 1385 return !isVI() && !isGFX9(); 1386 } 1387 1388 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1389 1390 bool hasIntClamp() const { 1391 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1392 } 1393 1394 AMDGPUTargetStreamer &getTargetStreamer() { 1395 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1396 return static_cast<AMDGPUTargetStreamer &>(TS); 1397 } 1398 1399 const MCRegisterInfo *getMRI() const { 1400 // We need this const_cast because for some reason getContext() is not const 1401 // in MCAsmParser. 1402 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1403 } 1404 1405 const MCInstrInfo *getMII() const { 1406 return &MII; 1407 } 1408 1409 const FeatureBitset &getFeatureBits() const { 1410 return getSTI().getFeatureBits(); 1411 } 1412 1413 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1414 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1415 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1416 1417 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1418 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1419 bool isForcedDPP() const { return ForcedDPP; } 1420 bool isForcedSDWA() const { return ForcedSDWA; } 1421 ArrayRef<unsigned> getMatchedVariants() const; 1422 StringRef getMatchedVariantName() const; 1423 1424 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1425 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1426 bool RestoreOnFailure); 1427 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1428 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1429 SMLoc &EndLoc) override; 1430 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1431 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1432 unsigned Kind) override; 1433 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1434 OperandVector &Operands, MCStreamer &Out, 1435 uint64_t &ErrorInfo, 1436 bool MatchingInlineAsm) override; 1437 bool ParseDirective(AsmToken DirectiveID) override; 1438 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1439 OperandMode Mode = OperandMode_Default); 1440 StringRef parseMnemonicSuffix(StringRef Name); 1441 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1442 SMLoc NameLoc, OperandVector &Operands) override; 1443 //bool ProcessInstruction(MCInst &Inst); 1444 1445 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1446 1447 OperandMatchResultTy 1448 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1449 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1450 bool (*ConvertResult)(int64_t &) = nullptr); 1451 1452 OperandMatchResultTy 1453 parseOperandArrayWithPrefix(const char *Prefix, 1454 OperandVector &Operands, 1455 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1456 bool (*ConvertResult)(int64_t&) = nullptr); 1457 1458 OperandMatchResultTy 1459 parseNamedBit(StringRef Name, OperandVector &Operands, 1460 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1461 OperandMatchResultTy parseCPol(OperandVector &Operands); 1462 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1463 StringRef &Value, 1464 SMLoc &StringLoc); 1465 1466 bool isModifier(); 1467 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1468 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1469 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1470 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1471 bool parseSP3NegModifier(); 1472 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1473 OperandMatchResultTy parseReg(OperandVector &Operands); 1474 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1475 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1476 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1477 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1478 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1479 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1480 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1481 OperandMatchResultTy parseUfmt(int64_t &Format); 1482 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1483 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1484 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1485 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1486 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1487 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1488 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1489 1490 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1491 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1492 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1493 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1494 1495 bool parseCnt(int64_t &IntVal); 1496 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1497 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1498 1499 private: 1500 struct OperandInfoTy { 1501 SMLoc Loc; 1502 int64_t Id; 1503 bool IsSymbolic = false; 1504 bool IsDefined = false; 1505 1506 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1507 }; 1508 1509 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1510 bool validateSendMsg(const OperandInfoTy &Msg, 1511 const OperandInfoTy &Op, 1512 const OperandInfoTy &Stream); 1513 1514 bool parseHwregBody(OperandInfoTy &HwReg, 1515 OperandInfoTy &Offset, 1516 OperandInfoTy &Width); 1517 bool validateHwreg(const OperandInfoTy &HwReg, 1518 const OperandInfoTy &Offset, 1519 const OperandInfoTy &Width); 1520 1521 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1522 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1523 1524 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1525 const OperandVector &Operands) const; 1526 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1527 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1528 SMLoc getLitLoc(const OperandVector &Operands) const; 1529 SMLoc getConstLoc(const OperandVector &Operands) const; 1530 1531 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1532 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1534 bool validateSOPLiteral(const MCInst &Inst) const; 1535 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1536 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateIntClampSupported(const MCInst &Inst); 1538 bool validateMIMGAtomicDMask(const MCInst &Inst); 1539 bool validateMIMGGatherDMask(const MCInst &Inst); 1540 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1541 bool validateMIMGDataSize(const MCInst &Inst); 1542 bool validateMIMGAddrSize(const MCInst &Inst); 1543 bool validateMIMGD16(const MCInst &Inst); 1544 bool validateMIMGDim(const MCInst &Inst); 1545 bool validateMIMGMSAA(const MCInst &Inst); 1546 bool validateOpSel(const MCInst &Inst); 1547 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1548 bool validateVccOperand(unsigned Reg) const; 1549 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1550 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1551 bool validateAGPRLdSt(const MCInst &Inst) const; 1552 bool validateVGPRAlign(const MCInst &Inst) const; 1553 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1554 bool validateDivScale(const MCInst &Inst); 1555 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1556 const SMLoc &IDLoc); 1557 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1558 unsigned getConstantBusLimit(unsigned Opcode) const; 1559 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1560 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1561 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1562 1563 bool isSupportedMnemo(StringRef Mnemo, 1564 const FeatureBitset &FBS); 1565 bool isSupportedMnemo(StringRef Mnemo, 1566 const FeatureBitset &FBS, 1567 ArrayRef<unsigned> Variants); 1568 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1569 1570 bool isId(const StringRef Id) const; 1571 bool isId(const AsmToken &Token, const StringRef Id) const; 1572 bool isToken(const AsmToken::TokenKind Kind) const; 1573 bool trySkipId(const StringRef Id); 1574 bool trySkipId(const StringRef Pref, const StringRef Id); 1575 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1576 bool trySkipToken(const AsmToken::TokenKind Kind); 1577 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1578 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1579 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1580 1581 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1582 AsmToken::TokenKind getTokenKind() const; 1583 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1584 bool parseExpr(OperandVector &Operands); 1585 StringRef getTokenStr() const; 1586 AsmToken peekToken(); 1587 AsmToken getToken() const; 1588 SMLoc getLoc() const; 1589 void lex(); 1590 1591 public: 1592 void onBeginOfFile() override; 1593 1594 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1595 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1596 1597 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1598 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1599 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1600 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1601 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1602 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1603 1604 bool parseSwizzleOperand(int64_t &Op, 1605 const unsigned MinVal, 1606 const unsigned MaxVal, 1607 const StringRef ErrMsg, 1608 SMLoc &Loc); 1609 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1610 const unsigned MinVal, 1611 const unsigned MaxVal, 1612 const StringRef ErrMsg); 1613 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1614 bool parseSwizzleOffset(int64_t &Imm); 1615 bool parseSwizzleMacro(int64_t &Imm); 1616 bool parseSwizzleQuadPerm(int64_t &Imm); 1617 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1618 bool parseSwizzleBroadcast(int64_t &Imm); 1619 bool parseSwizzleSwap(int64_t &Imm); 1620 bool parseSwizzleReverse(int64_t &Imm); 1621 1622 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1623 int64_t parseGPRIdxMacro(); 1624 1625 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1626 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1627 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1628 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1629 1630 AMDGPUOperand::Ptr defaultCPol() const; 1631 1632 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1633 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1634 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1635 AMDGPUOperand::Ptr defaultFlatOffset() const; 1636 1637 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1638 1639 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1640 OptionalImmIndexMap &OptionalIdx); 1641 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1642 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1643 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1644 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1645 OptionalImmIndexMap &OptionalIdx); 1646 1647 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1648 1649 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1650 bool IsAtomic = false); 1651 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1652 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1653 1654 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1655 1656 bool parseDimId(unsigned &Encoding); 1657 OperandMatchResultTy parseDim(OperandVector &Operands); 1658 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1659 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1660 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1661 int64_t parseDPPCtrlSel(StringRef Ctrl); 1662 int64_t parseDPPCtrlPerm(); 1663 AMDGPUOperand::Ptr defaultRowMask() const; 1664 AMDGPUOperand::Ptr defaultBankMask() const; 1665 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1666 AMDGPUOperand::Ptr defaultFI() const; 1667 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1668 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1669 1670 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1671 AMDGPUOperand::ImmTy Type); 1672 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1673 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1674 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1675 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1676 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1677 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1678 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1679 uint64_t BasicInstType, 1680 bool SkipDstVcc = false, 1681 bool SkipSrcVcc = false); 1682 1683 AMDGPUOperand::Ptr defaultBLGP() const; 1684 AMDGPUOperand::Ptr defaultCBSZ() const; 1685 AMDGPUOperand::Ptr defaultABID() const; 1686 1687 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1688 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1689 }; 1690 1691 struct OptionalOperand { 1692 const char *Name; 1693 AMDGPUOperand::ImmTy Type; 1694 bool IsBit; 1695 bool (*ConvertResult)(int64_t&); 1696 }; 1697 1698 } // end anonymous namespace 1699 1700 // May be called with integer type with equivalent bitwidth. 1701 static const fltSemantics *getFltSemantics(unsigned Size) { 1702 switch (Size) { 1703 case 4: 1704 return &APFloat::IEEEsingle(); 1705 case 8: 1706 return &APFloat::IEEEdouble(); 1707 case 2: 1708 return &APFloat::IEEEhalf(); 1709 default: 1710 llvm_unreachable("unsupported fp type"); 1711 } 1712 } 1713 1714 static const fltSemantics *getFltSemantics(MVT VT) { 1715 return getFltSemantics(VT.getSizeInBits() / 8); 1716 } 1717 1718 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1719 switch (OperandType) { 1720 case AMDGPU::OPERAND_REG_IMM_INT32: 1721 case AMDGPU::OPERAND_REG_IMM_FP32: 1722 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1723 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1724 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1725 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1727 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1728 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1729 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1730 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1731 case AMDGPU::OPERAND_KIMM32: 1732 return &APFloat::IEEEsingle(); 1733 case AMDGPU::OPERAND_REG_IMM_INT64: 1734 case AMDGPU::OPERAND_REG_IMM_FP64: 1735 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1736 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1737 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1738 return &APFloat::IEEEdouble(); 1739 case AMDGPU::OPERAND_REG_IMM_INT16: 1740 case AMDGPU::OPERAND_REG_IMM_FP16: 1741 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1742 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1743 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1744 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1745 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1746 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1747 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1748 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1749 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1750 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1751 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1752 case AMDGPU::OPERAND_KIMM16: 1753 return &APFloat::IEEEhalf(); 1754 default: 1755 llvm_unreachable("unsupported fp type"); 1756 } 1757 } 1758 1759 //===----------------------------------------------------------------------===// 1760 // Operand 1761 //===----------------------------------------------------------------------===// 1762 1763 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1764 bool Lost; 1765 1766 // Convert literal to single precision 1767 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1768 APFloat::rmNearestTiesToEven, 1769 &Lost); 1770 // We allow precision lost but not overflow or underflow 1771 if (Status != APFloat::opOK && 1772 Lost && 1773 ((Status & APFloat::opOverflow) != 0 || 1774 (Status & APFloat::opUnderflow) != 0)) { 1775 return false; 1776 } 1777 1778 return true; 1779 } 1780 1781 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1782 return isUIntN(Size, Val) || isIntN(Size, Val); 1783 } 1784 1785 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1786 if (VT.getScalarType() == MVT::i16) { 1787 // FP immediate values are broken. 1788 return isInlinableIntLiteral(Val); 1789 } 1790 1791 // f16/v2f16 operands work correctly for all values. 1792 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1793 } 1794 1795 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1796 1797 // This is a hack to enable named inline values like 1798 // shared_base with both 32-bit and 64-bit operands. 1799 // Note that these values are defined as 1800 // 32-bit operands only. 1801 if (isInlineValue()) { 1802 return true; 1803 } 1804 1805 if (!isImmTy(ImmTyNone)) { 1806 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1807 return false; 1808 } 1809 // TODO: We should avoid using host float here. It would be better to 1810 // check the float bit values which is what a few other places do. 1811 // We've had bot failures before due to weird NaN support on mips hosts. 1812 1813 APInt Literal(64, Imm.Val); 1814 1815 if (Imm.IsFPImm) { // We got fp literal token 1816 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1817 return AMDGPU::isInlinableLiteral64(Imm.Val, 1818 AsmParser->hasInv2PiInlineImm()); 1819 } 1820 1821 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1822 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1823 return false; 1824 1825 if (type.getScalarSizeInBits() == 16) { 1826 return isInlineableLiteralOp16( 1827 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1828 type, AsmParser->hasInv2PiInlineImm()); 1829 } 1830 1831 // Check if single precision literal is inlinable 1832 return AMDGPU::isInlinableLiteral32( 1833 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1834 AsmParser->hasInv2PiInlineImm()); 1835 } 1836 1837 // We got int literal token. 1838 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1839 return AMDGPU::isInlinableLiteral64(Imm.Val, 1840 AsmParser->hasInv2PiInlineImm()); 1841 } 1842 1843 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1844 return false; 1845 } 1846 1847 if (type.getScalarSizeInBits() == 16) { 1848 return isInlineableLiteralOp16( 1849 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1850 type, AsmParser->hasInv2PiInlineImm()); 1851 } 1852 1853 return AMDGPU::isInlinableLiteral32( 1854 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1855 AsmParser->hasInv2PiInlineImm()); 1856 } 1857 1858 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1859 // Check that this immediate can be added as literal 1860 if (!isImmTy(ImmTyNone)) { 1861 return false; 1862 } 1863 1864 if (!Imm.IsFPImm) { 1865 // We got int literal token. 1866 1867 if (type == MVT::f64 && hasFPModifiers()) { 1868 // Cannot apply fp modifiers to int literals preserving the same semantics 1869 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1870 // disable these cases. 1871 return false; 1872 } 1873 1874 unsigned Size = type.getSizeInBits(); 1875 if (Size == 64) 1876 Size = 32; 1877 1878 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1879 // types. 1880 return isSafeTruncation(Imm.Val, Size); 1881 } 1882 1883 // We got fp literal token 1884 if (type == MVT::f64) { // Expected 64-bit fp operand 1885 // We would set low 64-bits of literal to zeroes but we accept this literals 1886 return true; 1887 } 1888 1889 if (type == MVT::i64) { // Expected 64-bit int operand 1890 // We don't allow fp literals in 64-bit integer instructions. It is 1891 // unclear how we should encode them. 1892 return false; 1893 } 1894 1895 // We allow fp literals with f16x2 operands assuming that the specified 1896 // literal goes into the lower half and the upper half is zero. We also 1897 // require that the literal may be losslesly converted to f16. 1898 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1899 (type == MVT::v2i16)? MVT::i16 : 1900 (type == MVT::v2f32)? MVT::f32 : type; 1901 1902 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1903 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1904 } 1905 1906 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1907 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1908 } 1909 1910 bool AMDGPUOperand::isVRegWithInputMods() const { 1911 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1912 // GFX90A allows DPP on 64-bit operands. 1913 (isRegClass(AMDGPU::VReg_64RegClassID) && 1914 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1915 } 1916 1917 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1918 if (AsmParser->isVI()) 1919 return isVReg32(); 1920 else if (AsmParser->isGFX9Plus()) 1921 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1922 else 1923 return false; 1924 } 1925 1926 bool AMDGPUOperand::isSDWAFP16Operand() const { 1927 return isSDWAOperand(MVT::f16); 1928 } 1929 1930 bool AMDGPUOperand::isSDWAFP32Operand() const { 1931 return isSDWAOperand(MVT::f32); 1932 } 1933 1934 bool AMDGPUOperand::isSDWAInt16Operand() const { 1935 return isSDWAOperand(MVT::i16); 1936 } 1937 1938 bool AMDGPUOperand::isSDWAInt32Operand() const { 1939 return isSDWAOperand(MVT::i32); 1940 } 1941 1942 bool AMDGPUOperand::isBoolReg() const { 1943 auto FB = AsmParser->getFeatureBits(); 1944 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1945 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1946 } 1947 1948 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1949 { 1950 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1951 assert(Size == 2 || Size == 4 || Size == 8); 1952 1953 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1954 1955 if (Imm.Mods.Abs) { 1956 Val &= ~FpSignMask; 1957 } 1958 if (Imm.Mods.Neg) { 1959 Val ^= FpSignMask; 1960 } 1961 1962 return Val; 1963 } 1964 1965 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1966 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1967 Inst.getNumOperands())) { 1968 addLiteralImmOperand(Inst, Imm.Val, 1969 ApplyModifiers & 1970 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1971 } else { 1972 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1973 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1974 setImmKindNone(); 1975 } 1976 } 1977 1978 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1979 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1980 auto OpNum = Inst.getNumOperands(); 1981 // Check that this operand accepts literals 1982 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1983 1984 if (ApplyModifiers) { 1985 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1986 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1987 Val = applyInputFPModifiers(Val, Size); 1988 } 1989 1990 APInt Literal(64, Val); 1991 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1992 1993 if (Imm.IsFPImm) { // We got fp literal token 1994 switch (OpTy) { 1995 case AMDGPU::OPERAND_REG_IMM_INT64: 1996 case AMDGPU::OPERAND_REG_IMM_FP64: 1997 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1998 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1999 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2000 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2001 AsmParser->hasInv2PiInlineImm())) { 2002 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2003 setImmKindConst(); 2004 return; 2005 } 2006 2007 // Non-inlineable 2008 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2009 // For fp operands we check if low 32 bits are zeros 2010 if (Literal.getLoBits(32) != 0) { 2011 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2012 "Can't encode literal as exact 64-bit floating-point operand. " 2013 "Low 32-bits will be set to zero"); 2014 } 2015 2016 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2017 setImmKindLiteral(); 2018 return; 2019 } 2020 2021 // We don't allow fp literals in 64-bit integer instructions. It is 2022 // unclear how we should encode them. This case should be checked earlier 2023 // in predicate methods (isLiteralImm()) 2024 llvm_unreachable("fp literal in 64-bit integer instruction."); 2025 2026 case AMDGPU::OPERAND_REG_IMM_INT32: 2027 case AMDGPU::OPERAND_REG_IMM_FP32: 2028 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2029 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2030 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2031 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2032 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2033 case AMDGPU::OPERAND_REG_IMM_INT16: 2034 case AMDGPU::OPERAND_REG_IMM_FP16: 2035 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2036 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2037 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2038 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2039 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2040 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2041 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2042 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2043 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2044 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2045 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2046 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2047 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2048 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2049 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2050 case AMDGPU::OPERAND_KIMM32: 2051 case AMDGPU::OPERAND_KIMM16: { 2052 bool lost; 2053 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2054 // Convert literal to single precision 2055 FPLiteral.convert(*getOpFltSemantics(OpTy), 2056 APFloat::rmNearestTiesToEven, &lost); 2057 // We allow precision lost but not overflow or underflow. This should be 2058 // checked earlier in isLiteralImm() 2059 2060 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2061 Inst.addOperand(MCOperand::createImm(ImmVal)); 2062 setImmKindLiteral(); 2063 return; 2064 } 2065 default: 2066 llvm_unreachable("invalid operand size"); 2067 } 2068 2069 return; 2070 } 2071 2072 // We got int literal token. 2073 // Only sign extend inline immediates. 2074 switch (OpTy) { 2075 case AMDGPU::OPERAND_REG_IMM_INT32: 2076 case AMDGPU::OPERAND_REG_IMM_FP32: 2077 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2078 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2079 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2080 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2082 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2083 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2084 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2085 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2086 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2087 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2088 if (isSafeTruncation(Val, 32) && 2089 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2090 AsmParser->hasInv2PiInlineImm())) { 2091 Inst.addOperand(MCOperand::createImm(Val)); 2092 setImmKindConst(); 2093 return; 2094 } 2095 2096 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2097 setImmKindLiteral(); 2098 return; 2099 2100 case AMDGPU::OPERAND_REG_IMM_INT64: 2101 case AMDGPU::OPERAND_REG_IMM_FP64: 2102 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2103 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2104 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2105 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2106 Inst.addOperand(MCOperand::createImm(Val)); 2107 setImmKindConst(); 2108 return; 2109 } 2110 2111 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2112 setImmKindLiteral(); 2113 return; 2114 2115 case AMDGPU::OPERAND_REG_IMM_INT16: 2116 case AMDGPU::OPERAND_REG_IMM_FP16: 2117 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2118 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2119 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2120 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2122 if (isSafeTruncation(Val, 16) && 2123 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2124 AsmParser->hasInv2PiInlineImm())) { 2125 Inst.addOperand(MCOperand::createImm(Val)); 2126 setImmKindConst(); 2127 return; 2128 } 2129 2130 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2131 setImmKindLiteral(); 2132 return; 2133 2134 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2135 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2136 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2137 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2138 assert(isSafeTruncation(Val, 16)); 2139 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2140 AsmParser->hasInv2PiInlineImm())); 2141 2142 Inst.addOperand(MCOperand::createImm(Val)); 2143 return; 2144 } 2145 case AMDGPU::OPERAND_KIMM32: 2146 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2147 setImmKindNone(); 2148 return; 2149 case AMDGPU::OPERAND_KIMM16: 2150 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2151 setImmKindNone(); 2152 return; 2153 default: 2154 llvm_unreachable("invalid operand size"); 2155 } 2156 } 2157 2158 template <unsigned Bitwidth> 2159 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2160 APInt Literal(64, Imm.Val); 2161 setImmKindNone(); 2162 2163 if (!Imm.IsFPImm) { 2164 // We got int literal token. 2165 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2166 return; 2167 } 2168 2169 bool Lost; 2170 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2171 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2172 APFloat::rmNearestTiesToEven, &Lost); 2173 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2174 } 2175 2176 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2177 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2178 } 2179 2180 static bool isInlineValue(unsigned Reg) { 2181 switch (Reg) { 2182 case AMDGPU::SRC_SHARED_BASE: 2183 case AMDGPU::SRC_SHARED_LIMIT: 2184 case AMDGPU::SRC_PRIVATE_BASE: 2185 case AMDGPU::SRC_PRIVATE_LIMIT: 2186 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2187 return true; 2188 case AMDGPU::SRC_VCCZ: 2189 case AMDGPU::SRC_EXECZ: 2190 case AMDGPU::SRC_SCC: 2191 return true; 2192 case AMDGPU::SGPR_NULL: 2193 return true; 2194 default: 2195 return false; 2196 } 2197 } 2198 2199 bool AMDGPUOperand::isInlineValue() const { 2200 return isRegKind() && ::isInlineValue(getReg()); 2201 } 2202 2203 //===----------------------------------------------------------------------===// 2204 // AsmParser 2205 //===----------------------------------------------------------------------===// 2206 2207 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2208 if (Is == IS_VGPR) { 2209 switch (RegWidth) { 2210 default: return -1; 2211 case 1: return AMDGPU::VGPR_32RegClassID; 2212 case 2: return AMDGPU::VReg_64RegClassID; 2213 case 3: return AMDGPU::VReg_96RegClassID; 2214 case 4: return AMDGPU::VReg_128RegClassID; 2215 case 5: return AMDGPU::VReg_160RegClassID; 2216 case 6: return AMDGPU::VReg_192RegClassID; 2217 case 7: return AMDGPU::VReg_224RegClassID; 2218 case 8: return AMDGPU::VReg_256RegClassID; 2219 case 16: return AMDGPU::VReg_512RegClassID; 2220 case 32: return AMDGPU::VReg_1024RegClassID; 2221 } 2222 } else if (Is == IS_TTMP) { 2223 switch (RegWidth) { 2224 default: return -1; 2225 case 1: return AMDGPU::TTMP_32RegClassID; 2226 case 2: return AMDGPU::TTMP_64RegClassID; 2227 case 4: return AMDGPU::TTMP_128RegClassID; 2228 case 8: return AMDGPU::TTMP_256RegClassID; 2229 case 16: return AMDGPU::TTMP_512RegClassID; 2230 } 2231 } else if (Is == IS_SGPR) { 2232 switch (RegWidth) { 2233 default: return -1; 2234 case 1: return AMDGPU::SGPR_32RegClassID; 2235 case 2: return AMDGPU::SGPR_64RegClassID; 2236 case 3: return AMDGPU::SGPR_96RegClassID; 2237 case 4: return AMDGPU::SGPR_128RegClassID; 2238 case 5: return AMDGPU::SGPR_160RegClassID; 2239 case 6: return AMDGPU::SGPR_192RegClassID; 2240 case 7: return AMDGPU::SGPR_224RegClassID; 2241 case 8: return AMDGPU::SGPR_256RegClassID; 2242 case 16: return AMDGPU::SGPR_512RegClassID; 2243 } 2244 } else if (Is == IS_AGPR) { 2245 switch (RegWidth) { 2246 default: return -1; 2247 case 1: return AMDGPU::AGPR_32RegClassID; 2248 case 2: return AMDGPU::AReg_64RegClassID; 2249 case 3: return AMDGPU::AReg_96RegClassID; 2250 case 4: return AMDGPU::AReg_128RegClassID; 2251 case 5: return AMDGPU::AReg_160RegClassID; 2252 case 6: return AMDGPU::AReg_192RegClassID; 2253 case 7: return AMDGPU::AReg_224RegClassID; 2254 case 8: return AMDGPU::AReg_256RegClassID; 2255 case 16: return AMDGPU::AReg_512RegClassID; 2256 case 32: return AMDGPU::AReg_1024RegClassID; 2257 } 2258 } 2259 return -1; 2260 } 2261 2262 static unsigned getSpecialRegForName(StringRef RegName) { 2263 return StringSwitch<unsigned>(RegName) 2264 .Case("exec", AMDGPU::EXEC) 2265 .Case("vcc", AMDGPU::VCC) 2266 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2267 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2268 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2269 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2270 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2271 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2272 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2273 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2274 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2275 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2276 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2277 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2278 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2279 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2280 .Case("m0", AMDGPU::M0) 2281 .Case("vccz", AMDGPU::SRC_VCCZ) 2282 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2283 .Case("execz", AMDGPU::SRC_EXECZ) 2284 .Case("src_execz", AMDGPU::SRC_EXECZ) 2285 .Case("scc", AMDGPU::SRC_SCC) 2286 .Case("src_scc", AMDGPU::SRC_SCC) 2287 .Case("tba", AMDGPU::TBA) 2288 .Case("tma", AMDGPU::TMA) 2289 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2290 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2291 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2292 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2293 .Case("vcc_lo", AMDGPU::VCC_LO) 2294 .Case("vcc_hi", AMDGPU::VCC_HI) 2295 .Case("exec_lo", AMDGPU::EXEC_LO) 2296 .Case("exec_hi", AMDGPU::EXEC_HI) 2297 .Case("tma_lo", AMDGPU::TMA_LO) 2298 .Case("tma_hi", AMDGPU::TMA_HI) 2299 .Case("tba_lo", AMDGPU::TBA_LO) 2300 .Case("tba_hi", AMDGPU::TBA_HI) 2301 .Case("pc", AMDGPU::PC_REG) 2302 .Case("null", AMDGPU::SGPR_NULL) 2303 .Default(AMDGPU::NoRegister); 2304 } 2305 2306 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2307 SMLoc &EndLoc, bool RestoreOnFailure) { 2308 auto R = parseRegister(); 2309 if (!R) return true; 2310 assert(R->isReg()); 2311 RegNo = R->getReg(); 2312 StartLoc = R->getStartLoc(); 2313 EndLoc = R->getEndLoc(); 2314 return false; 2315 } 2316 2317 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2318 SMLoc &EndLoc) { 2319 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2320 } 2321 2322 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2323 SMLoc &StartLoc, 2324 SMLoc &EndLoc) { 2325 bool Result = 2326 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2327 bool PendingErrors = getParser().hasPendingError(); 2328 getParser().clearPendingErrors(); 2329 if (PendingErrors) 2330 return MatchOperand_ParseFail; 2331 if (Result) 2332 return MatchOperand_NoMatch; 2333 return MatchOperand_Success; 2334 } 2335 2336 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2337 RegisterKind RegKind, unsigned Reg1, 2338 SMLoc Loc) { 2339 switch (RegKind) { 2340 case IS_SPECIAL: 2341 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2342 Reg = AMDGPU::EXEC; 2343 RegWidth = 2; 2344 return true; 2345 } 2346 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2347 Reg = AMDGPU::FLAT_SCR; 2348 RegWidth = 2; 2349 return true; 2350 } 2351 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2352 Reg = AMDGPU::XNACK_MASK; 2353 RegWidth = 2; 2354 return true; 2355 } 2356 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2357 Reg = AMDGPU::VCC; 2358 RegWidth = 2; 2359 return true; 2360 } 2361 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2362 Reg = AMDGPU::TBA; 2363 RegWidth = 2; 2364 return true; 2365 } 2366 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2367 Reg = AMDGPU::TMA; 2368 RegWidth = 2; 2369 return true; 2370 } 2371 Error(Loc, "register does not fit in the list"); 2372 return false; 2373 case IS_VGPR: 2374 case IS_SGPR: 2375 case IS_AGPR: 2376 case IS_TTMP: 2377 if (Reg1 != Reg + RegWidth) { 2378 Error(Loc, "registers in a list must have consecutive indices"); 2379 return false; 2380 } 2381 RegWidth++; 2382 return true; 2383 default: 2384 llvm_unreachable("unexpected register kind"); 2385 } 2386 } 2387 2388 struct RegInfo { 2389 StringLiteral Name; 2390 RegisterKind Kind; 2391 }; 2392 2393 static constexpr RegInfo RegularRegisters[] = { 2394 {{"v"}, IS_VGPR}, 2395 {{"s"}, IS_SGPR}, 2396 {{"ttmp"}, IS_TTMP}, 2397 {{"acc"}, IS_AGPR}, 2398 {{"a"}, IS_AGPR}, 2399 }; 2400 2401 static bool isRegularReg(RegisterKind Kind) { 2402 return Kind == IS_VGPR || 2403 Kind == IS_SGPR || 2404 Kind == IS_TTMP || 2405 Kind == IS_AGPR; 2406 } 2407 2408 static const RegInfo* getRegularRegInfo(StringRef Str) { 2409 for (const RegInfo &Reg : RegularRegisters) 2410 if (Str.startswith(Reg.Name)) 2411 return &Reg; 2412 return nullptr; 2413 } 2414 2415 static bool getRegNum(StringRef Str, unsigned& Num) { 2416 return !Str.getAsInteger(10, Num); 2417 } 2418 2419 bool 2420 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2421 const AsmToken &NextToken) const { 2422 2423 // A list of consecutive registers: [s0,s1,s2,s3] 2424 if (Token.is(AsmToken::LBrac)) 2425 return true; 2426 2427 if (!Token.is(AsmToken::Identifier)) 2428 return false; 2429 2430 // A single register like s0 or a range of registers like s[0:1] 2431 2432 StringRef Str = Token.getString(); 2433 const RegInfo *Reg = getRegularRegInfo(Str); 2434 if (Reg) { 2435 StringRef RegName = Reg->Name; 2436 StringRef RegSuffix = Str.substr(RegName.size()); 2437 if (!RegSuffix.empty()) { 2438 unsigned Num; 2439 // A single register with an index: rXX 2440 if (getRegNum(RegSuffix, Num)) 2441 return true; 2442 } else { 2443 // A range of registers: r[XX:YY]. 2444 if (NextToken.is(AsmToken::LBrac)) 2445 return true; 2446 } 2447 } 2448 2449 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2450 } 2451 2452 bool 2453 AMDGPUAsmParser::isRegister() 2454 { 2455 return isRegister(getToken(), peekToken()); 2456 } 2457 2458 unsigned 2459 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2460 unsigned RegNum, 2461 unsigned RegWidth, 2462 SMLoc Loc) { 2463 2464 assert(isRegularReg(RegKind)); 2465 2466 unsigned AlignSize = 1; 2467 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2468 // SGPR and TTMP registers must be aligned. 2469 // Max required alignment is 4 dwords. 2470 AlignSize = std::min(RegWidth, 4u); 2471 } 2472 2473 if (RegNum % AlignSize != 0) { 2474 Error(Loc, "invalid register alignment"); 2475 return AMDGPU::NoRegister; 2476 } 2477 2478 unsigned RegIdx = RegNum / AlignSize; 2479 int RCID = getRegClass(RegKind, RegWidth); 2480 if (RCID == -1) { 2481 Error(Loc, "invalid or unsupported register size"); 2482 return AMDGPU::NoRegister; 2483 } 2484 2485 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2486 const MCRegisterClass RC = TRI->getRegClass(RCID); 2487 if (RegIdx >= RC.getNumRegs()) { 2488 Error(Loc, "register index is out of range"); 2489 return AMDGPU::NoRegister; 2490 } 2491 2492 return RC.getRegister(RegIdx); 2493 } 2494 2495 bool 2496 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2497 int64_t RegLo, RegHi; 2498 if (!skipToken(AsmToken::LBrac, "missing register index")) 2499 return false; 2500 2501 SMLoc FirstIdxLoc = getLoc(); 2502 SMLoc SecondIdxLoc; 2503 2504 if (!parseExpr(RegLo)) 2505 return false; 2506 2507 if (trySkipToken(AsmToken::Colon)) { 2508 SecondIdxLoc = getLoc(); 2509 if (!parseExpr(RegHi)) 2510 return false; 2511 } else { 2512 RegHi = RegLo; 2513 } 2514 2515 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2516 return false; 2517 2518 if (!isUInt<32>(RegLo)) { 2519 Error(FirstIdxLoc, "invalid register index"); 2520 return false; 2521 } 2522 2523 if (!isUInt<32>(RegHi)) { 2524 Error(SecondIdxLoc, "invalid register index"); 2525 return false; 2526 } 2527 2528 if (RegLo > RegHi) { 2529 Error(FirstIdxLoc, "first register index should not exceed second index"); 2530 return false; 2531 } 2532 2533 Num = static_cast<unsigned>(RegLo); 2534 Width = (RegHi - RegLo) + 1; 2535 return true; 2536 } 2537 2538 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2539 unsigned &RegNum, unsigned &RegWidth, 2540 SmallVectorImpl<AsmToken> &Tokens) { 2541 assert(isToken(AsmToken::Identifier)); 2542 unsigned Reg = getSpecialRegForName(getTokenStr()); 2543 if (Reg) { 2544 RegNum = 0; 2545 RegWidth = 1; 2546 RegKind = IS_SPECIAL; 2547 Tokens.push_back(getToken()); 2548 lex(); // skip register name 2549 } 2550 return Reg; 2551 } 2552 2553 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2554 unsigned &RegNum, unsigned &RegWidth, 2555 SmallVectorImpl<AsmToken> &Tokens) { 2556 assert(isToken(AsmToken::Identifier)); 2557 StringRef RegName = getTokenStr(); 2558 auto Loc = getLoc(); 2559 2560 const RegInfo *RI = getRegularRegInfo(RegName); 2561 if (!RI) { 2562 Error(Loc, "invalid register name"); 2563 return AMDGPU::NoRegister; 2564 } 2565 2566 Tokens.push_back(getToken()); 2567 lex(); // skip register name 2568 2569 RegKind = RI->Kind; 2570 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2571 if (!RegSuffix.empty()) { 2572 // Single 32-bit register: vXX. 2573 if (!getRegNum(RegSuffix, RegNum)) { 2574 Error(Loc, "invalid register index"); 2575 return AMDGPU::NoRegister; 2576 } 2577 RegWidth = 1; 2578 } else { 2579 // Range of registers: v[XX:YY]. ":YY" is optional. 2580 if (!ParseRegRange(RegNum, RegWidth)) 2581 return AMDGPU::NoRegister; 2582 } 2583 2584 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2585 } 2586 2587 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2588 unsigned &RegWidth, 2589 SmallVectorImpl<AsmToken> &Tokens) { 2590 unsigned Reg = AMDGPU::NoRegister; 2591 auto ListLoc = getLoc(); 2592 2593 if (!skipToken(AsmToken::LBrac, 2594 "expected a register or a list of registers")) { 2595 return AMDGPU::NoRegister; 2596 } 2597 2598 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2599 2600 auto Loc = getLoc(); 2601 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2602 return AMDGPU::NoRegister; 2603 if (RegWidth != 1) { 2604 Error(Loc, "expected a single 32-bit register"); 2605 return AMDGPU::NoRegister; 2606 } 2607 2608 for (; trySkipToken(AsmToken::Comma); ) { 2609 RegisterKind NextRegKind; 2610 unsigned NextReg, NextRegNum, NextRegWidth; 2611 Loc = getLoc(); 2612 2613 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2614 NextRegNum, NextRegWidth, 2615 Tokens)) { 2616 return AMDGPU::NoRegister; 2617 } 2618 if (NextRegWidth != 1) { 2619 Error(Loc, "expected a single 32-bit register"); 2620 return AMDGPU::NoRegister; 2621 } 2622 if (NextRegKind != RegKind) { 2623 Error(Loc, "registers in a list must be of the same kind"); 2624 return AMDGPU::NoRegister; 2625 } 2626 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2627 return AMDGPU::NoRegister; 2628 } 2629 2630 if (!skipToken(AsmToken::RBrac, 2631 "expected a comma or a closing square bracket")) { 2632 return AMDGPU::NoRegister; 2633 } 2634 2635 if (isRegularReg(RegKind)) 2636 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2637 2638 return Reg; 2639 } 2640 2641 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2642 unsigned &RegNum, unsigned &RegWidth, 2643 SmallVectorImpl<AsmToken> &Tokens) { 2644 auto Loc = getLoc(); 2645 Reg = AMDGPU::NoRegister; 2646 2647 if (isToken(AsmToken::Identifier)) { 2648 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2649 if (Reg == AMDGPU::NoRegister) 2650 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2651 } else { 2652 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2653 } 2654 2655 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2656 if (Reg == AMDGPU::NoRegister) { 2657 assert(Parser.hasPendingError()); 2658 return false; 2659 } 2660 2661 if (!subtargetHasRegister(*TRI, Reg)) { 2662 if (Reg == AMDGPU::SGPR_NULL) { 2663 Error(Loc, "'null' operand is not supported on this GPU"); 2664 } else { 2665 Error(Loc, "register not available on this GPU"); 2666 } 2667 return false; 2668 } 2669 2670 return true; 2671 } 2672 2673 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2674 unsigned &RegNum, unsigned &RegWidth, 2675 bool RestoreOnFailure /*=false*/) { 2676 Reg = AMDGPU::NoRegister; 2677 2678 SmallVector<AsmToken, 1> Tokens; 2679 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2680 if (RestoreOnFailure) { 2681 while (!Tokens.empty()) { 2682 getLexer().UnLex(Tokens.pop_back_val()); 2683 } 2684 } 2685 return true; 2686 } 2687 return false; 2688 } 2689 2690 Optional<StringRef> 2691 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2692 switch (RegKind) { 2693 case IS_VGPR: 2694 return StringRef(".amdgcn.next_free_vgpr"); 2695 case IS_SGPR: 2696 return StringRef(".amdgcn.next_free_sgpr"); 2697 default: 2698 return None; 2699 } 2700 } 2701 2702 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2703 auto SymbolName = getGprCountSymbolName(RegKind); 2704 assert(SymbolName && "initializing invalid register kind"); 2705 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2706 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2707 } 2708 2709 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2710 unsigned DwordRegIndex, 2711 unsigned RegWidth) { 2712 // Symbols are only defined for GCN targets 2713 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2714 return true; 2715 2716 auto SymbolName = getGprCountSymbolName(RegKind); 2717 if (!SymbolName) 2718 return true; 2719 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2720 2721 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2722 int64_t OldCount; 2723 2724 if (!Sym->isVariable()) 2725 return !Error(getLoc(), 2726 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2727 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2728 return !Error( 2729 getLoc(), 2730 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2731 2732 if (OldCount <= NewMax) 2733 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2734 2735 return true; 2736 } 2737 2738 std::unique_ptr<AMDGPUOperand> 2739 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2740 const auto &Tok = getToken(); 2741 SMLoc StartLoc = Tok.getLoc(); 2742 SMLoc EndLoc = Tok.getEndLoc(); 2743 RegisterKind RegKind; 2744 unsigned Reg, RegNum, RegWidth; 2745 2746 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2747 return nullptr; 2748 } 2749 if (isHsaAbiVersion3Or4(&getSTI())) { 2750 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2751 return nullptr; 2752 } else 2753 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2754 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2755 } 2756 2757 OperandMatchResultTy 2758 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2759 // TODO: add syntactic sugar for 1/(2*PI) 2760 2761 assert(!isRegister()); 2762 assert(!isModifier()); 2763 2764 const auto& Tok = getToken(); 2765 const auto& NextTok = peekToken(); 2766 bool IsReal = Tok.is(AsmToken::Real); 2767 SMLoc S = getLoc(); 2768 bool Negate = false; 2769 2770 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2771 lex(); 2772 IsReal = true; 2773 Negate = true; 2774 } 2775 2776 if (IsReal) { 2777 // Floating-point expressions are not supported. 2778 // Can only allow floating-point literals with an 2779 // optional sign. 2780 2781 StringRef Num = getTokenStr(); 2782 lex(); 2783 2784 APFloat RealVal(APFloat::IEEEdouble()); 2785 auto roundMode = APFloat::rmNearestTiesToEven; 2786 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2787 return MatchOperand_ParseFail; 2788 } 2789 if (Negate) 2790 RealVal.changeSign(); 2791 2792 Operands.push_back( 2793 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2794 AMDGPUOperand::ImmTyNone, true)); 2795 2796 return MatchOperand_Success; 2797 2798 } else { 2799 int64_t IntVal; 2800 const MCExpr *Expr; 2801 SMLoc S = getLoc(); 2802 2803 if (HasSP3AbsModifier) { 2804 // This is a workaround for handling expressions 2805 // as arguments of SP3 'abs' modifier, for example: 2806 // |1.0| 2807 // |-1| 2808 // |1+x| 2809 // This syntax is not compatible with syntax of standard 2810 // MC expressions (due to the trailing '|'). 2811 SMLoc EndLoc; 2812 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2813 return MatchOperand_ParseFail; 2814 } else { 2815 if (Parser.parseExpression(Expr)) 2816 return MatchOperand_ParseFail; 2817 } 2818 2819 if (Expr->evaluateAsAbsolute(IntVal)) { 2820 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2821 } else { 2822 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2823 } 2824 2825 return MatchOperand_Success; 2826 } 2827 2828 return MatchOperand_NoMatch; 2829 } 2830 2831 OperandMatchResultTy 2832 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2833 if (!isRegister()) 2834 return MatchOperand_NoMatch; 2835 2836 if (auto R = parseRegister()) { 2837 assert(R->isReg()); 2838 Operands.push_back(std::move(R)); 2839 return MatchOperand_Success; 2840 } 2841 return MatchOperand_ParseFail; 2842 } 2843 2844 OperandMatchResultTy 2845 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2846 auto res = parseReg(Operands); 2847 if (res != MatchOperand_NoMatch) { 2848 return res; 2849 } else if (isModifier()) { 2850 return MatchOperand_NoMatch; 2851 } else { 2852 return parseImm(Operands, HasSP3AbsMod); 2853 } 2854 } 2855 2856 bool 2857 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2858 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2859 const auto &str = Token.getString(); 2860 return str == "abs" || str == "neg" || str == "sext"; 2861 } 2862 return false; 2863 } 2864 2865 bool 2866 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2867 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2868 } 2869 2870 bool 2871 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2872 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2873 } 2874 2875 bool 2876 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2877 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2878 } 2879 2880 // Check if this is an operand modifier or an opcode modifier 2881 // which may look like an expression but it is not. We should 2882 // avoid parsing these modifiers as expressions. Currently 2883 // recognized sequences are: 2884 // |...| 2885 // abs(...) 2886 // neg(...) 2887 // sext(...) 2888 // -reg 2889 // -|...| 2890 // -abs(...) 2891 // name:... 2892 // Note that simple opcode modifiers like 'gds' may be parsed as 2893 // expressions; this is a special case. See getExpressionAsToken. 2894 // 2895 bool 2896 AMDGPUAsmParser::isModifier() { 2897 2898 AsmToken Tok = getToken(); 2899 AsmToken NextToken[2]; 2900 peekTokens(NextToken); 2901 2902 return isOperandModifier(Tok, NextToken[0]) || 2903 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2904 isOpcodeModifierWithVal(Tok, NextToken[0]); 2905 } 2906 2907 // Check if the current token is an SP3 'neg' modifier. 2908 // Currently this modifier is allowed in the following context: 2909 // 2910 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2911 // 2. Before an 'abs' modifier: -abs(...) 2912 // 3. Before an SP3 'abs' modifier: -|...| 2913 // 2914 // In all other cases "-" is handled as a part 2915 // of an expression that follows the sign. 2916 // 2917 // Note: When "-" is followed by an integer literal, 2918 // this is interpreted as integer negation rather 2919 // than a floating-point NEG modifier applied to N. 2920 // Beside being contr-intuitive, such use of floating-point 2921 // NEG modifier would have resulted in different meaning 2922 // of integer literals used with VOP1/2/C and VOP3, 2923 // for example: 2924 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2925 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2926 // Negative fp literals with preceding "-" are 2927 // handled likewise for unifomtity 2928 // 2929 bool 2930 AMDGPUAsmParser::parseSP3NegModifier() { 2931 2932 AsmToken NextToken[2]; 2933 peekTokens(NextToken); 2934 2935 if (isToken(AsmToken::Minus) && 2936 (isRegister(NextToken[0], NextToken[1]) || 2937 NextToken[0].is(AsmToken::Pipe) || 2938 isId(NextToken[0], "abs"))) { 2939 lex(); 2940 return true; 2941 } 2942 2943 return false; 2944 } 2945 2946 OperandMatchResultTy 2947 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2948 bool AllowImm) { 2949 bool Neg, SP3Neg; 2950 bool Abs, SP3Abs; 2951 SMLoc Loc; 2952 2953 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2954 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2955 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2956 return MatchOperand_ParseFail; 2957 } 2958 2959 SP3Neg = parseSP3NegModifier(); 2960 2961 Loc = getLoc(); 2962 Neg = trySkipId("neg"); 2963 if (Neg && SP3Neg) { 2964 Error(Loc, "expected register or immediate"); 2965 return MatchOperand_ParseFail; 2966 } 2967 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2968 return MatchOperand_ParseFail; 2969 2970 Abs = trySkipId("abs"); 2971 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2972 return MatchOperand_ParseFail; 2973 2974 Loc = getLoc(); 2975 SP3Abs = trySkipToken(AsmToken::Pipe); 2976 if (Abs && SP3Abs) { 2977 Error(Loc, "expected register or immediate"); 2978 return MatchOperand_ParseFail; 2979 } 2980 2981 OperandMatchResultTy Res; 2982 if (AllowImm) { 2983 Res = parseRegOrImm(Operands, SP3Abs); 2984 } else { 2985 Res = parseReg(Operands); 2986 } 2987 if (Res != MatchOperand_Success) { 2988 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2989 } 2990 2991 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2992 return MatchOperand_ParseFail; 2993 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2994 return MatchOperand_ParseFail; 2995 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2996 return MatchOperand_ParseFail; 2997 2998 AMDGPUOperand::Modifiers Mods; 2999 Mods.Abs = Abs || SP3Abs; 3000 Mods.Neg = Neg || SP3Neg; 3001 3002 if (Mods.hasFPModifiers()) { 3003 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3004 if (Op.isExpr()) { 3005 Error(Op.getStartLoc(), "expected an absolute expression"); 3006 return MatchOperand_ParseFail; 3007 } 3008 Op.setModifiers(Mods); 3009 } 3010 return MatchOperand_Success; 3011 } 3012 3013 OperandMatchResultTy 3014 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3015 bool AllowImm) { 3016 bool Sext = trySkipId("sext"); 3017 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3018 return MatchOperand_ParseFail; 3019 3020 OperandMatchResultTy Res; 3021 if (AllowImm) { 3022 Res = parseRegOrImm(Operands); 3023 } else { 3024 Res = parseReg(Operands); 3025 } 3026 if (Res != MatchOperand_Success) { 3027 return Sext? MatchOperand_ParseFail : Res; 3028 } 3029 3030 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3031 return MatchOperand_ParseFail; 3032 3033 AMDGPUOperand::Modifiers Mods; 3034 Mods.Sext = Sext; 3035 3036 if (Mods.hasIntModifiers()) { 3037 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3038 if (Op.isExpr()) { 3039 Error(Op.getStartLoc(), "expected an absolute expression"); 3040 return MatchOperand_ParseFail; 3041 } 3042 Op.setModifiers(Mods); 3043 } 3044 3045 return MatchOperand_Success; 3046 } 3047 3048 OperandMatchResultTy 3049 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3050 return parseRegOrImmWithFPInputMods(Operands, false); 3051 } 3052 3053 OperandMatchResultTy 3054 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3055 return parseRegOrImmWithIntInputMods(Operands, false); 3056 } 3057 3058 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3059 auto Loc = getLoc(); 3060 if (trySkipId("off")) { 3061 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3062 AMDGPUOperand::ImmTyOff, false)); 3063 return MatchOperand_Success; 3064 } 3065 3066 if (!isRegister()) 3067 return MatchOperand_NoMatch; 3068 3069 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3070 if (Reg) { 3071 Operands.push_back(std::move(Reg)); 3072 return MatchOperand_Success; 3073 } 3074 3075 return MatchOperand_ParseFail; 3076 3077 } 3078 3079 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3080 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3081 3082 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3083 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3084 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3085 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3086 return Match_InvalidOperand; 3087 3088 if ((TSFlags & SIInstrFlags::VOP3) && 3089 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3090 getForcedEncodingSize() != 64) 3091 return Match_PreferE32; 3092 3093 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3094 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3095 // v_mac_f32/16 allow only dst_sel == DWORD; 3096 auto OpNum = 3097 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3098 const auto &Op = Inst.getOperand(OpNum); 3099 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3100 return Match_InvalidOperand; 3101 } 3102 } 3103 3104 return Match_Success; 3105 } 3106 3107 static ArrayRef<unsigned> getAllVariants() { 3108 static const unsigned Variants[] = { 3109 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3110 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3111 }; 3112 3113 return makeArrayRef(Variants); 3114 } 3115 3116 // What asm variants we should check 3117 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3118 if (getForcedEncodingSize() == 32) { 3119 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3120 return makeArrayRef(Variants); 3121 } 3122 3123 if (isForcedVOP3()) { 3124 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3125 return makeArrayRef(Variants); 3126 } 3127 3128 if (isForcedSDWA()) { 3129 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3130 AMDGPUAsmVariants::SDWA9}; 3131 return makeArrayRef(Variants); 3132 } 3133 3134 if (isForcedDPP()) { 3135 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3136 return makeArrayRef(Variants); 3137 } 3138 3139 return getAllVariants(); 3140 } 3141 3142 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3143 if (getForcedEncodingSize() == 32) 3144 return "e32"; 3145 3146 if (isForcedVOP3()) 3147 return "e64"; 3148 3149 if (isForcedSDWA()) 3150 return "sdwa"; 3151 3152 if (isForcedDPP()) 3153 return "dpp"; 3154 3155 return ""; 3156 } 3157 3158 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3159 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3160 const unsigned Num = Desc.getNumImplicitUses(); 3161 for (unsigned i = 0; i < Num; ++i) { 3162 unsigned Reg = Desc.ImplicitUses[i]; 3163 switch (Reg) { 3164 case AMDGPU::FLAT_SCR: 3165 case AMDGPU::VCC: 3166 case AMDGPU::VCC_LO: 3167 case AMDGPU::VCC_HI: 3168 case AMDGPU::M0: 3169 return Reg; 3170 default: 3171 break; 3172 } 3173 } 3174 return AMDGPU::NoRegister; 3175 } 3176 3177 // NB: This code is correct only when used to check constant 3178 // bus limitations because GFX7 support no f16 inline constants. 3179 // Note that there are no cases when a GFX7 opcode violates 3180 // constant bus limitations due to the use of an f16 constant. 3181 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3182 unsigned OpIdx) const { 3183 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3184 3185 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3186 return false; 3187 } 3188 3189 const MCOperand &MO = Inst.getOperand(OpIdx); 3190 3191 int64_t Val = MO.getImm(); 3192 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3193 3194 switch (OpSize) { // expected operand size 3195 case 8: 3196 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3197 case 4: 3198 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3199 case 2: { 3200 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3201 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3202 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3203 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3204 return AMDGPU::isInlinableIntLiteral(Val); 3205 3206 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3207 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3208 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3209 return AMDGPU::isInlinableIntLiteralV216(Val); 3210 3211 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3212 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3213 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3214 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3215 3216 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3217 } 3218 default: 3219 llvm_unreachable("invalid operand size"); 3220 } 3221 } 3222 3223 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3224 if (!isGFX10Plus()) 3225 return 1; 3226 3227 switch (Opcode) { 3228 // 64-bit shift instructions can use only one scalar value input 3229 case AMDGPU::V_LSHLREV_B64_e64: 3230 case AMDGPU::V_LSHLREV_B64_gfx10: 3231 case AMDGPU::V_LSHRREV_B64_e64: 3232 case AMDGPU::V_LSHRREV_B64_gfx10: 3233 case AMDGPU::V_ASHRREV_I64_e64: 3234 case AMDGPU::V_ASHRREV_I64_gfx10: 3235 case AMDGPU::V_LSHL_B64_e64: 3236 case AMDGPU::V_LSHR_B64_e64: 3237 case AMDGPU::V_ASHR_I64_e64: 3238 return 1; 3239 default: 3240 return 2; 3241 } 3242 } 3243 3244 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3245 const MCOperand &MO = Inst.getOperand(OpIdx); 3246 if (MO.isImm()) { 3247 return !isInlineConstant(Inst, OpIdx); 3248 } else if (MO.isReg()) { 3249 auto Reg = MO.getReg(); 3250 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3251 auto PReg = mc2PseudoReg(Reg); 3252 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3253 } else { 3254 return true; 3255 } 3256 } 3257 3258 bool 3259 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3260 const OperandVector &Operands) { 3261 const unsigned Opcode = Inst.getOpcode(); 3262 const MCInstrDesc &Desc = MII.get(Opcode); 3263 unsigned LastSGPR = AMDGPU::NoRegister; 3264 unsigned ConstantBusUseCount = 0; 3265 unsigned NumLiterals = 0; 3266 unsigned LiteralSize; 3267 3268 if (Desc.TSFlags & 3269 (SIInstrFlags::VOPC | 3270 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3271 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3272 SIInstrFlags::SDWA)) { 3273 // Check special imm operands (used by madmk, etc) 3274 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3275 ++NumLiterals; 3276 LiteralSize = 4; 3277 } 3278 3279 SmallDenseSet<unsigned> SGPRsUsed; 3280 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3281 if (SGPRUsed != AMDGPU::NoRegister) { 3282 SGPRsUsed.insert(SGPRUsed); 3283 ++ConstantBusUseCount; 3284 } 3285 3286 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3287 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3288 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3289 3290 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3291 3292 for (int OpIdx : OpIndices) { 3293 if (OpIdx == -1) break; 3294 3295 const MCOperand &MO = Inst.getOperand(OpIdx); 3296 if (usesConstantBus(Inst, OpIdx)) { 3297 if (MO.isReg()) { 3298 LastSGPR = mc2PseudoReg(MO.getReg()); 3299 // Pairs of registers with a partial intersections like these 3300 // s0, s[0:1] 3301 // flat_scratch_lo, flat_scratch 3302 // flat_scratch_lo, flat_scratch_hi 3303 // are theoretically valid but they are disabled anyway. 3304 // Note that this code mimics SIInstrInfo::verifyInstruction 3305 if (!SGPRsUsed.count(LastSGPR)) { 3306 SGPRsUsed.insert(LastSGPR); 3307 ++ConstantBusUseCount; 3308 } 3309 } else { // Expression or a literal 3310 3311 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3312 continue; // special operand like VINTERP attr_chan 3313 3314 // An instruction may use only one literal. 3315 // This has been validated on the previous step. 3316 // See validateVOPLiteral. 3317 // This literal may be used as more than one operand. 3318 // If all these operands are of the same size, 3319 // this literal counts as one scalar value. 3320 // Otherwise it counts as 2 scalar values. 3321 // See "GFX10 Shader Programming", section 3.6.2.3. 3322 3323 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3324 if (Size < 4) Size = 4; 3325 3326 if (NumLiterals == 0) { 3327 NumLiterals = 1; 3328 LiteralSize = Size; 3329 } else if (LiteralSize != Size) { 3330 NumLiterals = 2; 3331 } 3332 } 3333 } 3334 } 3335 } 3336 ConstantBusUseCount += NumLiterals; 3337 3338 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3339 return true; 3340 3341 SMLoc LitLoc = getLitLoc(Operands); 3342 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3343 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3344 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3345 return false; 3346 } 3347 3348 bool 3349 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3350 const OperandVector &Operands) { 3351 const unsigned Opcode = Inst.getOpcode(); 3352 const MCInstrDesc &Desc = MII.get(Opcode); 3353 3354 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3355 if (DstIdx == -1 || 3356 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3357 return true; 3358 } 3359 3360 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3361 3362 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3363 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3364 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3365 3366 assert(DstIdx != -1); 3367 const MCOperand &Dst = Inst.getOperand(DstIdx); 3368 assert(Dst.isReg()); 3369 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3370 3371 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3372 3373 for (int SrcIdx : SrcIndices) { 3374 if (SrcIdx == -1) break; 3375 const MCOperand &Src = Inst.getOperand(SrcIdx); 3376 if (Src.isReg()) { 3377 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3378 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3379 Error(getRegLoc(SrcReg, Operands), 3380 "destination must be different than all sources"); 3381 return false; 3382 } 3383 } 3384 } 3385 3386 return true; 3387 } 3388 3389 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3390 3391 const unsigned Opc = Inst.getOpcode(); 3392 const MCInstrDesc &Desc = MII.get(Opc); 3393 3394 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3395 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3396 assert(ClampIdx != -1); 3397 return Inst.getOperand(ClampIdx).getImm() == 0; 3398 } 3399 3400 return true; 3401 } 3402 3403 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3404 3405 const unsigned Opc = Inst.getOpcode(); 3406 const MCInstrDesc &Desc = MII.get(Opc); 3407 3408 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3409 return true; 3410 3411 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3412 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3413 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3414 3415 assert(VDataIdx != -1); 3416 3417 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3418 return true; 3419 3420 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3421 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3422 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3423 if (DMask == 0) 3424 DMask = 1; 3425 3426 unsigned DataSize = 3427 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3428 if (hasPackedD16()) { 3429 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3430 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3431 DataSize = (DataSize + 1) / 2; 3432 } 3433 3434 return (VDataSize / 4) == DataSize + TFESize; 3435 } 3436 3437 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3438 const unsigned Opc = Inst.getOpcode(); 3439 const MCInstrDesc &Desc = MII.get(Opc); 3440 3441 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3442 return true; 3443 3444 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3445 3446 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3447 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3448 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3449 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3450 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3451 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3452 3453 assert(VAddr0Idx != -1); 3454 assert(SrsrcIdx != -1); 3455 assert(SrsrcIdx > VAddr0Idx); 3456 3457 if (DimIdx == -1) 3458 return true; // intersect_ray 3459 3460 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3461 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3462 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3463 unsigned ActualAddrSize = 3464 IsNSA ? SrsrcIdx - VAddr0Idx 3465 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3466 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3467 3468 unsigned ExpectedAddrSize = 3469 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3470 3471 if (!IsNSA) { 3472 if (ExpectedAddrSize > 8) 3473 ExpectedAddrSize = 16; 3474 3475 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3476 // This provides backward compatibility for assembly created 3477 // before 160b/192b/224b types were directly supported. 3478 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3479 return true; 3480 } 3481 3482 return ActualAddrSize == ExpectedAddrSize; 3483 } 3484 3485 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3486 3487 const unsigned Opc = Inst.getOpcode(); 3488 const MCInstrDesc &Desc = MII.get(Opc); 3489 3490 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3491 return true; 3492 if (!Desc.mayLoad() || !Desc.mayStore()) 3493 return true; // Not atomic 3494 3495 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3496 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3497 3498 // This is an incomplete check because image_atomic_cmpswap 3499 // may only use 0x3 and 0xf while other atomic operations 3500 // may use 0x1 and 0x3. However these limitations are 3501 // verified when we check that dmask matches dst size. 3502 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3503 } 3504 3505 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3506 3507 const unsigned Opc = Inst.getOpcode(); 3508 const MCInstrDesc &Desc = MII.get(Opc); 3509 3510 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3511 return true; 3512 3513 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3514 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3515 3516 // GATHER4 instructions use dmask in a different fashion compared to 3517 // other MIMG instructions. The only useful DMASK values are 3518 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3519 // (red,red,red,red) etc.) The ISA document doesn't mention 3520 // this. 3521 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3522 } 3523 3524 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3525 const unsigned Opc = Inst.getOpcode(); 3526 const MCInstrDesc &Desc = MII.get(Opc); 3527 3528 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3529 return true; 3530 3531 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3532 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3533 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3534 3535 if (!BaseOpcode->MSAA) 3536 return true; 3537 3538 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3539 assert(DimIdx != -1); 3540 3541 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3542 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3543 3544 return DimInfo->MSAA; 3545 } 3546 3547 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3548 { 3549 switch (Opcode) { 3550 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3551 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3552 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3553 return true; 3554 default: 3555 return false; 3556 } 3557 } 3558 3559 // movrels* opcodes should only allow VGPRS as src0. 3560 // This is specified in .td description for vop1/vop3, 3561 // but sdwa is handled differently. See isSDWAOperand. 3562 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3563 const OperandVector &Operands) { 3564 3565 const unsigned Opc = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opc); 3567 3568 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3569 return true; 3570 3571 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3572 assert(Src0Idx != -1); 3573 3574 SMLoc ErrLoc; 3575 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3576 if (Src0.isReg()) { 3577 auto Reg = mc2PseudoReg(Src0.getReg()); 3578 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3579 if (!isSGPR(Reg, TRI)) 3580 return true; 3581 ErrLoc = getRegLoc(Reg, Operands); 3582 } else { 3583 ErrLoc = getConstLoc(Operands); 3584 } 3585 3586 Error(ErrLoc, "source operand must be a VGPR"); 3587 return false; 3588 } 3589 3590 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3591 const OperandVector &Operands) { 3592 3593 const unsigned Opc = Inst.getOpcode(); 3594 3595 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3596 return true; 3597 3598 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3599 assert(Src0Idx != -1); 3600 3601 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3602 if (!Src0.isReg()) 3603 return true; 3604 3605 auto Reg = mc2PseudoReg(Src0.getReg()); 3606 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3607 if (isSGPR(Reg, TRI)) { 3608 Error(getRegLoc(Reg, Operands), 3609 "source operand must be either a VGPR or an inline constant"); 3610 return false; 3611 } 3612 3613 return true; 3614 } 3615 3616 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3617 switch (Inst.getOpcode()) { 3618 default: 3619 return true; 3620 case V_DIV_SCALE_F32_gfx6_gfx7: 3621 case V_DIV_SCALE_F32_vi: 3622 case V_DIV_SCALE_F32_gfx10: 3623 case V_DIV_SCALE_F64_gfx6_gfx7: 3624 case V_DIV_SCALE_F64_vi: 3625 case V_DIV_SCALE_F64_gfx10: 3626 break; 3627 } 3628 3629 // TODO: Check that src0 = src1 or src2. 3630 3631 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3632 AMDGPU::OpName::src2_modifiers, 3633 AMDGPU::OpName::src2_modifiers}) { 3634 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3635 .getImm() & 3636 SISrcMods::ABS) { 3637 return false; 3638 } 3639 } 3640 3641 return true; 3642 } 3643 3644 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3645 3646 const unsigned Opc = Inst.getOpcode(); 3647 const MCInstrDesc &Desc = MII.get(Opc); 3648 3649 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3650 return true; 3651 3652 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3653 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3654 if (isCI() || isSI()) 3655 return false; 3656 } 3657 3658 return true; 3659 } 3660 3661 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3662 const unsigned Opc = Inst.getOpcode(); 3663 const MCInstrDesc &Desc = MII.get(Opc); 3664 3665 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3666 return true; 3667 3668 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3669 if (DimIdx < 0) 3670 return true; 3671 3672 long Imm = Inst.getOperand(DimIdx).getImm(); 3673 if (Imm < 0 || Imm >= 8) 3674 return false; 3675 3676 return true; 3677 } 3678 3679 static bool IsRevOpcode(const unsigned Opcode) 3680 { 3681 switch (Opcode) { 3682 case AMDGPU::V_SUBREV_F32_e32: 3683 case AMDGPU::V_SUBREV_F32_e64: 3684 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3685 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3686 case AMDGPU::V_SUBREV_F32_e32_vi: 3687 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3688 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3689 case AMDGPU::V_SUBREV_F32_e64_vi: 3690 3691 case AMDGPU::V_SUBREV_CO_U32_e32: 3692 case AMDGPU::V_SUBREV_CO_U32_e64: 3693 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3694 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3695 3696 case AMDGPU::V_SUBBREV_U32_e32: 3697 case AMDGPU::V_SUBBREV_U32_e64: 3698 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3699 case AMDGPU::V_SUBBREV_U32_e32_vi: 3700 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3701 case AMDGPU::V_SUBBREV_U32_e64_vi: 3702 3703 case AMDGPU::V_SUBREV_U32_e32: 3704 case AMDGPU::V_SUBREV_U32_e64: 3705 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3706 case AMDGPU::V_SUBREV_U32_e32_vi: 3707 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3708 case AMDGPU::V_SUBREV_U32_e64_vi: 3709 3710 case AMDGPU::V_SUBREV_F16_e32: 3711 case AMDGPU::V_SUBREV_F16_e64: 3712 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3713 case AMDGPU::V_SUBREV_F16_e32_vi: 3714 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3715 case AMDGPU::V_SUBREV_F16_e64_vi: 3716 3717 case AMDGPU::V_SUBREV_U16_e32: 3718 case AMDGPU::V_SUBREV_U16_e64: 3719 case AMDGPU::V_SUBREV_U16_e32_vi: 3720 case AMDGPU::V_SUBREV_U16_e64_vi: 3721 3722 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3723 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3724 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3725 3726 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3727 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3728 3729 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3730 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3731 3732 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3733 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3734 3735 case AMDGPU::V_LSHRREV_B32_e32: 3736 case AMDGPU::V_LSHRREV_B32_e64: 3737 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3738 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3739 case AMDGPU::V_LSHRREV_B32_e32_vi: 3740 case AMDGPU::V_LSHRREV_B32_e64_vi: 3741 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3742 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3743 3744 case AMDGPU::V_ASHRREV_I32_e32: 3745 case AMDGPU::V_ASHRREV_I32_e64: 3746 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3747 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3748 case AMDGPU::V_ASHRREV_I32_e32_vi: 3749 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3750 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3751 case AMDGPU::V_ASHRREV_I32_e64_vi: 3752 3753 case AMDGPU::V_LSHLREV_B32_e32: 3754 case AMDGPU::V_LSHLREV_B32_e64: 3755 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3756 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3757 case AMDGPU::V_LSHLREV_B32_e32_vi: 3758 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3759 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3760 case AMDGPU::V_LSHLREV_B32_e64_vi: 3761 3762 case AMDGPU::V_LSHLREV_B16_e32: 3763 case AMDGPU::V_LSHLREV_B16_e64: 3764 case AMDGPU::V_LSHLREV_B16_e32_vi: 3765 case AMDGPU::V_LSHLREV_B16_e64_vi: 3766 case AMDGPU::V_LSHLREV_B16_gfx10: 3767 3768 case AMDGPU::V_LSHRREV_B16_e32: 3769 case AMDGPU::V_LSHRREV_B16_e64: 3770 case AMDGPU::V_LSHRREV_B16_e32_vi: 3771 case AMDGPU::V_LSHRREV_B16_e64_vi: 3772 case AMDGPU::V_LSHRREV_B16_gfx10: 3773 3774 case AMDGPU::V_ASHRREV_I16_e32: 3775 case AMDGPU::V_ASHRREV_I16_e64: 3776 case AMDGPU::V_ASHRREV_I16_e32_vi: 3777 case AMDGPU::V_ASHRREV_I16_e64_vi: 3778 case AMDGPU::V_ASHRREV_I16_gfx10: 3779 3780 case AMDGPU::V_LSHLREV_B64_e64: 3781 case AMDGPU::V_LSHLREV_B64_gfx10: 3782 case AMDGPU::V_LSHLREV_B64_vi: 3783 3784 case AMDGPU::V_LSHRREV_B64_e64: 3785 case AMDGPU::V_LSHRREV_B64_gfx10: 3786 case AMDGPU::V_LSHRREV_B64_vi: 3787 3788 case AMDGPU::V_ASHRREV_I64_e64: 3789 case AMDGPU::V_ASHRREV_I64_gfx10: 3790 case AMDGPU::V_ASHRREV_I64_vi: 3791 3792 case AMDGPU::V_PK_LSHLREV_B16: 3793 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3794 case AMDGPU::V_PK_LSHLREV_B16_vi: 3795 3796 case AMDGPU::V_PK_LSHRREV_B16: 3797 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3798 case AMDGPU::V_PK_LSHRREV_B16_vi: 3799 case AMDGPU::V_PK_ASHRREV_I16: 3800 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3801 case AMDGPU::V_PK_ASHRREV_I16_vi: 3802 return true; 3803 default: 3804 return false; 3805 } 3806 } 3807 3808 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3809 3810 using namespace SIInstrFlags; 3811 const unsigned Opcode = Inst.getOpcode(); 3812 const MCInstrDesc &Desc = MII.get(Opcode); 3813 3814 // lds_direct register is defined so that it can be used 3815 // with 9-bit operands only. Ignore encodings which do not accept these. 3816 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3817 if ((Desc.TSFlags & Enc) == 0) 3818 return None; 3819 3820 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3821 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3822 if (SrcIdx == -1) 3823 break; 3824 const auto &Src = Inst.getOperand(SrcIdx); 3825 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3826 3827 if (isGFX90A()) 3828 return StringRef("lds_direct is not supported on this GPU"); 3829 3830 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3831 return StringRef("lds_direct cannot be used with this instruction"); 3832 3833 if (SrcName != OpName::src0) 3834 return StringRef("lds_direct may be used as src0 only"); 3835 } 3836 } 3837 3838 return None; 3839 } 3840 3841 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3842 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3843 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3844 if (Op.isFlatOffset()) 3845 return Op.getStartLoc(); 3846 } 3847 return getLoc(); 3848 } 3849 3850 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3851 const OperandVector &Operands) { 3852 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3853 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3854 return true; 3855 3856 auto Opcode = Inst.getOpcode(); 3857 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3858 assert(OpNum != -1); 3859 3860 const auto &Op = Inst.getOperand(OpNum); 3861 if (!hasFlatOffsets() && Op.getImm() != 0) { 3862 Error(getFlatOffsetLoc(Operands), 3863 "flat offset modifier is not supported on this GPU"); 3864 return false; 3865 } 3866 3867 // For FLAT segment the offset must be positive; 3868 // MSB is ignored and forced to zero. 3869 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3870 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3871 if (!isIntN(OffsetSize, Op.getImm())) { 3872 Error(getFlatOffsetLoc(Operands), 3873 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3874 return false; 3875 } 3876 } else { 3877 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3878 if (!isUIntN(OffsetSize, Op.getImm())) { 3879 Error(getFlatOffsetLoc(Operands), 3880 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3881 return false; 3882 } 3883 } 3884 3885 return true; 3886 } 3887 3888 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3889 // Start with second operand because SMEM Offset cannot be dst or src0. 3890 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3891 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3892 if (Op.isSMEMOffset()) 3893 return Op.getStartLoc(); 3894 } 3895 return getLoc(); 3896 } 3897 3898 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3899 const OperandVector &Operands) { 3900 if (isCI() || isSI()) 3901 return true; 3902 3903 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3904 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3905 return true; 3906 3907 auto Opcode = Inst.getOpcode(); 3908 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3909 if (OpNum == -1) 3910 return true; 3911 3912 const auto &Op = Inst.getOperand(OpNum); 3913 if (!Op.isImm()) 3914 return true; 3915 3916 uint64_t Offset = Op.getImm(); 3917 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3918 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3919 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3920 return true; 3921 3922 Error(getSMEMOffsetLoc(Operands), 3923 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3924 "expected a 21-bit signed offset"); 3925 3926 return false; 3927 } 3928 3929 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3930 unsigned Opcode = Inst.getOpcode(); 3931 const MCInstrDesc &Desc = MII.get(Opcode); 3932 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3933 return true; 3934 3935 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3936 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3937 3938 const int OpIndices[] = { Src0Idx, Src1Idx }; 3939 3940 unsigned NumExprs = 0; 3941 unsigned NumLiterals = 0; 3942 uint32_t LiteralValue; 3943 3944 for (int OpIdx : OpIndices) { 3945 if (OpIdx == -1) break; 3946 3947 const MCOperand &MO = Inst.getOperand(OpIdx); 3948 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3949 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3950 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3951 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3952 if (NumLiterals == 0 || LiteralValue != Value) { 3953 LiteralValue = Value; 3954 ++NumLiterals; 3955 } 3956 } else if (MO.isExpr()) { 3957 ++NumExprs; 3958 } 3959 } 3960 } 3961 3962 return NumLiterals + NumExprs <= 1; 3963 } 3964 3965 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3966 const unsigned Opc = Inst.getOpcode(); 3967 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3968 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3969 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3970 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3971 3972 if (OpSel & ~3) 3973 return false; 3974 } 3975 return true; 3976 } 3977 3978 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3979 const OperandVector &Operands) { 3980 const unsigned Opc = Inst.getOpcode(); 3981 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3982 if (DppCtrlIdx < 0) 3983 return true; 3984 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3985 3986 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3987 // DPP64 is supported for row_newbcast only. 3988 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3989 if (Src0Idx >= 0 && 3990 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3991 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3992 Error(S, "64 bit dpp only supports row_newbcast"); 3993 return false; 3994 } 3995 } 3996 3997 return true; 3998 } 3999 4000 // Check if VCC register matches wavefront size 4001 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4002 auto FB = getFeatureBits(); 4003 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4004 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4005 } 4006 4007 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4008 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4009 const OperandVector &Operands) { 4010 unsigned Opcode = Inst.getOpcode(); 4011 const MCInstrDesc &Desc = MII.get(Opcode); 4012 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4013 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4014 ImmIdx == -1) 4015 return true; 4016 4017 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4018 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4019 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4020 4021 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4022 4023 unsigned NumExprs = 0; 4024 unsigned NumLiterals = 0; 4025 uint32_t LiteralValue; 4026 4027 for (int OpIdx : OpIndices) { 4028 if (OpIdx == -1) 4029 continue; 4030 4031 const MCOperand &MO = Inst.getOperand(OpIdx); 4032 if (!MO.isImm() && !MO.isExpr()) 4033 continue; 4034 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4035 continue; 4036 4037 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4038 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4039 Error(getConstLoc(Operands), 4040 "inline constants are not allowed for this operand"); 4041 return false; 4042 } 4043 4044 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4045 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4046 if (NumLiterals == 0 || LiteralValue != Value) { 4047 LiteralValue = Value; 4048 ++NumLiterals; 4049 } 4050 } else if (MO.isExpr()) { 4051 ++NumExprs; 4052 } 4053 } 4054 NumLiterals += NumExprs; 4055 4056 if (!NumLiterals) 4057 return true; 4058 4059 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4060 Error(getLitLoc(Operands), "literal operands are not supported"); 4061 return false; 4062 } 4063 4064 if (NumLiterals > 1) { 4065 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4066 return false; 4067 } 4068 4069 return true; 4070 } 4071 4072 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4073 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4074 const MCRegisterInfo *MRI) { 4075 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4076 if (OpIdx < 0) 4077 return -1; 4078 4079 const MCOperand &Op = Inst.getOperand(OpIdx); 4080 if (!Op.isReg()) 4081 return -1; 4082 4083 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4084 auto Reg = Sub ? Sub : Op.getReg(); 4085 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4086 return AGPR32.contains(Reg) ? 1 : 0; 4087 } 4088 4089 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4090 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4091 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4092 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4093 SIInstrFlags::DS)) == 0) 4094 return true; 4095 4096 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4097 : AMDGPU::OpName::vdata; 4098 4099 const MCRegisterInfo *MRI = getMRI(); 4100 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4101 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4102 4103 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4104 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4105 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4106 return false; 4107 } 4108 4109 auto FB = getFeatureBits(); 4110 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4111 if (DataAreg < 0 || DstAreg < 0) 4112 return true; 4113 return DstAreg == DataAreg; 4114 } 4115 4116 return DstAreg < 1 && DataAreg < 1; 4117 } 4118 4119 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4120 auto FB = getFeatureBits(); 4121 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4122 return true; 4123 4124 const MCRegisterInfo *MRI = getMRI(); 4125 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4126 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4127 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4128 const MCOperand &Op = Inst.getOperand(I); 4129 if (!Op.isReg()) 4130 continue; 4131 4132 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4133 if (!Sub) 4134 continue; 4135 4136 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4137 return false; 4138 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4139 return false; 4140 } 4141 4142 return true; 4143 } 4144 4145 // gfx90a has an undocumented limitation: 4146 // DS_GWS opcodes must use even aligned registers. 4147 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4148 const OperandVector &Operands) { 4149 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4150 return true; 4151 4152 int Opc = Inst.getOpcode(); 4153 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4154 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4155 return true; 4156 4157 const MCRegisterInfo *MRI = getMRI(); 4158 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4159 int Data0Pos = 4160 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4161 assert(Data0Pos != -1); 4162 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4163 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4164 if (RegIdx & 1) { 4165 SMLoc RegLoc = getRegLoc(Reg, Operands); 4166 Error(RegLoc, "vgpr must be even aligned"); 4167 return false; 4168 } 4169 4170 return true; 4171 } 4172 4173 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4174 const OperandVector &Operands, 4175 const SMLoc &IDLoc) { 4176 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4177 AMDGPU::OpName::cpol); 4178 if (CPolPos == -1) 4179 return true; 4180 4181 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4182 4183 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4184 if ((TSFlags & (SIInstrFlags::SMRD)) && 4185 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4186 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4187 return false; 4188 } 4189 4190 if (isGFX90A() && (CPol & CPol::SCC)) { 4191 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4192 StringRef CStr(S.getPointer()); 4193 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4194 Error(S, "scc is not supported on this GPU"); 4195 return false; 4196 } 4197 4198 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4199 return true; 4200 4201 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4202 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4203 Error(IDLoc, "instruction must use glc"); 4204 return false; 4205 } 4206 } else { 4207 if (CPol & CPol::GLC) { 4208 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4209 StringRef CStr(S.getPointer()); 4210 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4211 Error(S, "instruction must not use glc"); 4212 return false; 4213 } 4214 } 4215 4216 return true; 4217 } 4218 4219 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4220 const SMLoc &IDLoc, 4221 const OperandVector &Operands) { 4222 if (auto ErrMsg = validateLdsDirect(Inst)) { 4223 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4224 return false; 4225 } 4226 if (!validateSOPLiteral(Inst)) { 4227 Error(getLitLoc(Operands), 4228 "only one literal operand is allowed"); 4229 return false; 4230 } 4231 if (!validateVOPLiteral(Inst, Operands)) { 4232 return false; 4233 } 4234 if (!validateConstantBusLimitations(Inst, Operands)) { 4235 return false; 4236 } 4237 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4238 return false; 4239 } 4240 if (!validateIntClampSupported(Inst)) { 4241 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4242 "integer clamping is not supported on this GPU"); 4243 return false; 4244 } 4245 if (!validateOpSel(Inst)) { 4246 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4247 "invalid op_sel operand"); 4248 return false; 4249 } 4250 if (!validateDPP(Inst, Operands)) { 4251 return false; 4252 } 4253 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4254 if (!validateMIMGD16(Inst)) { 4255 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4256 "d16 modifier is not supported on this GPU"); 4257 return false; 4258 } 4259 if (!validateMIMGDim(Inst)) { 4260 Error(IDLoc, "dim modifier is required on this GPU"); 4261 return false; 4262 } 4263 if (!validateMIMGMSAA(Inst)) { 4264 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4265 "invalid dim; must be MSAA type"); 4266 return false; 4267 } 4268 if (!validateMIMGDataSize(Inst)) { 4269 Error(IDLoc, 4270 "image data size does not match dmask and tfe"); 4271 return false; 4272 } 4273 if (!validateMIMGAddrSize(Inst)) { 4274 Error(IDLoc, 4275 "image address size does not match dim and a16"); 4276 return false; 4277 } 4278 if (!validateMIMGAtomicDMask(Inst)) { 4279 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4280 "invalid atomic image dmask"); 4281 return false; 4282 } 4283 if (!validateMIMGGatherDMask(Inst)) { 4284 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4285 "invalid image_gather dmask: only one bit must be set"); 4286 return false; 4287 } 4288 if (!validateMovrels(Inst, Operands)) { 4289 return false; 4290 } 4291 if (!validateFlatOffset(Inst, Operands)) { 4292 return false; 4293 } 4294 if (!validateSMEMOffset(Inst, Operands)) { 4295 return false; 4296 } 4297 if (!validateMAIAccWrite(Inst, Operands)) { 4298 return false; 4299 } 4300 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4301 return false; 4302 } 4303 4304 if (!validateAGPRLdSt(Inst)) { 4305 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4306 ? "invalid register class: data and dst should be all VGPR or AGPR" 4307 : "invalid register class: agpr loads and stores not supported on this GPU" 4308 ); 4309 return false; 4310 } 4311 if (!validateVGPRAlign(Inst)) { 4312 Error(IDLoc, 4313 "invalid register class: vgpr tuples must be 64 bit aligned"); 4314 return false; 4315 } 4316 if (!validateGWS(Inst, Operands)) { 4317 return false; 4318 } 4319 4320 if (!validateDivScale(Inst)) { 4321 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4322 return false; 4323 } 4324 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4325 return false; 4326 } 4327 4328 return true; 4329 } 4330 4331 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4332 const FeatureBitset &FBS, 4333 unsigned VariantID = 0); 4334 4335 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4336 const FeatureBitset &AvailableFeatures, 4337 unsigned VariantID); 4338 4339 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4340 const FeatureBitset &FBS) { 4341 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4342 } 4343 4344 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4345 const FeatureBitset &FBS, 4346 ArrayRef<unsigned> Variants) { 4347 for (auto Variant : Variants) { 4348 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4349 return true; 4350 } 4351 4352 return false; 4353 } 4354 4355 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4356 const SMLoc &IDLoc) { 4357 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4358 4359 // Check if requested instruction variant is supported. 4360 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4361 return false; 4362 4363 // This instruction is not supported. 4364 // Clear any other pending errors because they are no longer relevant. 4365 getParser().clearPendingErrors(); 4366 4367 // Requested instruction variant is not supported. 4368 // Check if any other variants are supported. 4369 StringRef VariantName = getMatchedVariantName(); 4370 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4371 return Error(IDLoc, 4372 Twine(VariantName, 4373 " variant of this instruction is not supported")); 4374 } 4375 4376 // Finally check if this instruction is supported on any other GPU. 4377 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4378 return Error(IDLoc, "instruction not supported on this GPU"); 4379 } 4380 4381 // Instruction not supported on any GPU. Probably a typo. 4382 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4383 return Error(IDLoc, "invalid instruction" + Suggestion); 4384 } 4385 4386 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4387 OperandVector &Operands, 4388 MCStreamer &Out, 4389 uint64_t &ErrorInfo, 4390 bool MatchingInlineAsm) { 4391 MCInst Inst; 4392 unsigned Result = Match_Success; 4393 for (auto Variant : getMatchedVariants()) { 4394 uint64_t EI; 4395 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4396 Variant); 4397 // We order match statuses from least to most specific. We use most specific 4398 // status as resulting 4399 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4400 if ((R == Match_Success) || 4401 (R == Match_PreferE32) || 4402 (R == Match_MissingFeature && Result != Match_PreferE32) || 4403 (R == Match_InvalidOperand && Result != Match_MissingFeature 4404 && Result != Match_PreferE32) || 4405 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4406 && Result != Match_MissingFeature 4407 && Result != Match_PreferE32)) { 4408 Result = R; 4409 ErrorInfo = EI; 4410 } 4411 if (R == Match_Success) 4412 break; 4413 } 4414 4415 if (Result == Match_Success) { 4416 if (!validateInstruction(Inst, IDLoc, Operands)) { 4417 return true; 4418 } 4419 Inst.setLoc(IDLoc); 4420 Out.emitInstruction(Inst, getSTI()); 4421 return false; 4422 } 4423 4424 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4425 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4426 return true; 4427 } 4428 4429 switch (Result) { 4430 default: break; 4431 case Match_MissingFeature: 4432 // It has been verified that the specified instruction 4433 // mnemonic is valid. A match was found but it requires 4434 // features which are not supported on this GPU. 4435 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4436 4437 case Match_InvalidOperand: { 4438 SMLoc ErrorLoc = IDLoc; 4439 if (ErrorInfo != ~0ULL) { 4440 if (ErrorInfo >= Operands.size()) { 4441 return Error(IDLoc, "too few operands for instruction"); 4442 } 4443 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4444 if (ErrorLoc == SMLoc()) 4445 ErrorLoc = IDLoc; 4446 } 4447 return Error(ErrorLoc, "invalid operand for instruction"); 4448 } 4449 4450 case Match_PreferE32: 4451 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4452 "should be encoded as e32"); 4453 case Match_MnemonicFail: 4454 llvm_unreachable("Invalid instructions should have been handled already"); 4455 } 4456 llvm_unreachable("Implement any new match types added!"); 4457 } 4458 4459 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4460 int64_t Tmp = -1; 4461 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4462 return true; 4463 } 4464 if (getParser().parseAbsoluteExpression(Tmp)) { 4465 return true; 4466 } 4467 Ret = static_cast<uint32_t>(Tmp); 4468 return false; 4469 } 4470 4471 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4472 uint32_t &Minor) { 4473 if (ParseAsAbsoluteExpression(Major)) 4474 return TokError("invalid major version"); 4475 4476 if (!trySkipToken(AsmToken::Comma)) 4477 return TokError("minor version number required, comma expected"); 4478 4479 if (ParseAsAbsoluteExpression(Minor)) 4480 return TokError("invalid minor version"); 4481 4482 return false; 4483 } 4484 4485 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4486 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4487 return TokError("directive only supported for amdgcn architecture"); 4488 4489 std::string TargetIDDirective; 4490 SMLoc TargetStart = getTok().getLoc(); 4491 if (getParser().parseEscapedString(TargetIDDirective)) 4492 return true; 4493 4494 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4495 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4496 return getParser().Error(TargetRange.Start, 4497 (Twine(".amdgcn_target directive's target id ") + 4498 Twine(TargetIDDirective) + 4499 Twine(" does not match the specified target id ") + 4500 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4501 4502 return false; 4503 } 4504 4505 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4506 return Error(Range.Start, "value out of range", Range); 4507 } 4508 4509 bool AMDGPUAsmParser::calculateGPRBlocks( 4510 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4511 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4512 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4513 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4514 // TODO(scott.linder): These calculations are duplicated from 4515 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4516 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4517 4518 unsigned NumVGPRs = NextFreeVGPR; 4519 unsigned NumSGPRs = NextFreeSGPR; 4520 4521 if (Version.Major >= 10) 4522 NumSGPRs = 0; 4523 else { 4524 unsigned MaxAddressableNumSGPRs = 4525 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4526 4527 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4528 NumSGPRs > MaxAddressableNumSGPRs) 4529 return OutOfRangeError(SGPRRange); 4530 4531 NumSGPRs += 4532 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4533 4534 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4535 NumSGPRs > MaxAddressableNumSGPRs) 4536 return OutOfRangeError(SGPRRange); 4537 4538 if (Features.test(FeatureSGPRInitBug)) 4539 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4540 } 4541 4542 VGPRBlocks = 4543 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4544 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4545 4546 return false; 4547 } 4548 4549 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4550 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4551 return TokError("directive only supported for amdgcn architecture"); 4552 4553 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4554 return TokError("directive only supported for amdhsa OS"); 4555 4556 StringRef KernelName; 4557 if (getParser().parseIdentifier(KernelName)) 4558 return true; 4559 4560 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4561 4562 StringSet<> Seen; 4563 4564 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4565 4566 SMRange VGPRRange; 4567 uint64_t NextFreeVGPR = 0; 4568 uint64_t AccumOffset = 0; 4569 SMRange SGPRRange; 4570 uint64_t NextFreeSGPR = 0; 4571 unsigned UserSGPRCount = 0; 4572 bool ReserveVCC = true; 4573 bool ReserveFlatScr = true; 4574 Optional<bool> EnableWavefrontSize32; 4575 4576 while (true) { 4577 while (trySkipToken(AsmToken::EndOfStatement)); 4578 4579 StringRef ID; 4580 SMRange IDRange = getTok().getLocRange(); 4581 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4582 return true; 4583 4584 if (ID == ".end_amdhsa_kernel") 4585 break; 4586 4587 if (Seen.find(ID) != Seen.end()) 4588 return TokError(".amdhsa_ directives cannot be repeated"); 4589 Seen.insert(ID); 4590 4591 SMLoc ValStart = getLoc(); 4592 int64_t IVal; 4593 if (getParser().parseAbsoluteExpression(IVal)) 4594 return true; 4595 SMLoc ValEnd = getLoc(); 4596 SMRange ValRange = SMRange(ValStart, ValEnd); 4597 4598 if (IVal < 0) 4599 return OutOfRangeError(ValRange); 4600 4601 uint64_t Val = IVal; 4602 4603 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4604 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4605 return OutOfRangeError(RANGE); \ 4606 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4607 4608 if (ID == ".amdhsa_group_segment_fixed_size") { 4609 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4610 return OutOfRangeError(ValRange); 4611 KD.group_segment_fixed_size = Val; 4612 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4613 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4614 return OutOfRangeError(ValRange); 4615 KD.private_segment_fixed_size = Val; 4616 } else if (ID == ".amdhsa_kernarg_size") { 4617 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4618 return OutOfRangeError(ValRange); 4619 KD.kernarg_size = Val; 4620 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4621 if (hasArchitectedFlatScratch()) 4622 return Error(IDRange.Start, 4623 "directive is not supported with architected flat scratch", 4624 IDRange); 4625 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4626 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4627 Val, ValRange); 4628 if (Val) 4629 UserSGPRCount += 4; 4630 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4631 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4632 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4633 ValRange); 4634 if (Val) 4635 UserSGPRCount += 2; 4636 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4637 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4638 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4639 ValRange); 4640 if (Val) 4641 UserSGPRCount += 2; 4642 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4643 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4644 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4645 Val, ValRange); 4646 if (Val) 4647 UserSGPRCount += 2; 4648 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4649 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4650 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4651 ValRange); 4652 if (Val) 4653 UserSGPRCount += 2; 4654 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4655 if (hasArchitectedFlatScratch()) 4656 return Error(IDRange.Start, 4657 "directive is not supported with architected flat scratch", 4658 IDRange); 4659 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4660 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4661 ValRange); 4662 if (Val) 4663 UserSGPRCount += 2; 4664 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4665 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4666 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4667 Val, ValRange); 4668 if (Val) 4669 UserSGPRCount += 1; 4670 } else if (ID == ".amdhsa_wavefront_size32") { 4671 if (IVersion.Major < 10) 4672 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4673 EnableWavefrontSize32 = Val; 4674 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4675 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4676 Val, ValRange); 4677 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4678 if (hasArchitectedFlatScratch()) 4679 return Error(IDRange.Start, 4680 "directive is not supported with architected flat scratch", 4681 IDRange); 4682 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4683 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4684 } else if (ID == ".amdhsa_enable_private_segment") { 4685 if (!hasArchitectedFlatScratch()) 4686 return Error( 4687 IDRange.Start, 4688 "directive is not supported without architected flat scratch", 4689 IDRange); 4690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4691 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4692 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4693 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4694 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4695 ValRange); 4696 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4697 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4698 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4699 ValRange); 4700 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4701 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4702 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4703 ValRange); 4704 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4705 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4706 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4707 ValRange); 4708 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4709 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4710 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4711 ValRange); 4712 } else if (ID == ".amdhsa_next_free_vgpr") { 4713 VGPRRange = ValRange; 4714 NextFreeVGPR = Val; 4715 } else if (ID == ".amdhsa_next_free_sgpr") { 4716 SGPRRange = ValRange; 4717 NextFreeSGPR = Val; 4718 } else if (ID == ".amdhsa_accum_offset") { 4719 if (!isGFX90A()) 4720 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4721 AccumOffset = Val; 4722 } else if (ID == ".amdhsa_reserve_vcc") { 4723 if (!isUInt<1>(Val)) 4724 return OutOfRangeError(ValRange); 4725 ReserveVCC = Val; 4726 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4727 if (IVersion.Major < 7) 4728 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4729 if (hasArchitectedFlatScratch()) 4730 return Error(IDRange.Start, 4731 "directive is not supported with architected flat scratch", 4732 IDRange); 4733 if (!isUInt<1>(Val)) 4734 return OutOfRangeError(ValRange); 4735 ReserveFlatScr = Val; 4736 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4737 if (IVersion.Major < 8) 4738 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4739 if (!isUInt<1>(Val)) 4740 return OutOfRangeError(ValRange); 4741 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4742 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4743 IDRange); 4744 } else if (ID == ".amdhsa_float_round_mode_32") { 4745 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4746 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4747 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4748 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4749 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4750 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4751 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4752 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4753 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4754 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4755 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4756 ValRange); 4757 } else if (ID == ".amdhsa_dx10_clamp") { 4758 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4759 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4760 } else if (ID == ".amdhsa_ieee_mode") { 4761 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4762 Val, ValRange); 4763 } else if (ID == ".amdhsa_fp16_overflow") { 4764 if (IVersion.Major < 9) 4765 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4766 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4767 ValRange); 4768 } else if (ID == ".amdhsa_tg_split") { 4769 if (!isGFX90A()) 4770 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4771 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4772 ValRange); 4773 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4774 if (IVersion.Major < 10) 4775 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4776 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4777 ValRange); 4778 } else if (ID == ".amdhsa_memory_ordered") { 4779 if (IVersion.Major < 10) 4780 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4781 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4782 ValRange); 4783 } else if (ID == ".amdhsa_forward_progress") { 4784 if (IVersion.Major < 10) 4785 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4786 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4787 ValRange); 4788 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4789 PARSE_BITS_ENTRY( 4790 KD.compute_pgm_rsrc2, 4791 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4792 ValRange); 4793 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4794 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4795 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4796 Val, ValRange); 4797 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4798 PARSE_BITS_ENTRY( 4799 KD.compute_pgm_rsrc2, 4800 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4801 ValRange); 4802 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4804 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4805 Val, ValRange); 4806 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4808 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4809 Val, ValRange); 4810 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4811 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4812 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4813 Val, ValRange); 4814 } else if (ID == ".amdhsa_exception_int_div_zero") { 4815 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4816 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4817 Val, ValRange); 4818 } else { 4819 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4820 } 4821 4822 #undef PARSE_BITS_ENTRY 4823 } 4824 4825 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4826 return TokError(".amdhsa_next_free_vgpr directive is required"); 4827 4828 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4829 return TokError(".amdhsa_next_free_sgpr directive is required"); 4830 4831 unsigned VGPRBlocks; 4832 unsigned SGPRBlocks; 4833 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4834 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4835 EnableWavefrontSize32, NextFreeVGPR, 4836 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4837 SGPRBlocks)) 4838 return true; 4839 4840 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4841 VGPRBlocks)) 4842 return OutOfRangeError(VGPRRange); 4843 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4844 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4845 4846 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4847 SGPRBlocks)) 4848 return OutOfRangeError(SGPRRange); 4849 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4850 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4851 SGPRBlocks); 4852 4853 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4854 return TokError("too many user SGPRs enabled"); 4855 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4856 UserSGPRCount); 4857 4858 if (isGFX90A()) { 4859 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4860 return TokError(".amdhsa_accum_offset directive is required"); 4861 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4862 return TokError("accum_offset should be in range [4..256] in " 4863 "increments of 4"); 4864 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4865 return TokError("accum_offset exceeds total VGPR allocation"); 4866 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4867 (AccumOffset / 4 - 1)); 4868 } 4869 4870 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4871 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4872 ReserveFlatScr); 4873 return false; 4874 } 4875 4876 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4877 uint32_t Major; 4878 uint32_t Minor; 4879 4880 if (ParseDirectiveMajorMinor(Major, Minor)) 4881 return true; 4882 4883 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4884 return false; 4885 } 4886 4887 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4888 uint32_t Major; 4889 uint32_t Minor; 4890 uint32_t Stepping; 4891 StringRef VendorName; 4892 StringRef ArchName; 4893 4894 // If this directive has no arguments, then use the ISA version for the 4895 // targeted GPU. 4896 if (isToken(AsmToken::EndOfStatement)) { 4897 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4898 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4899 ISA.Stepping, 4900 "AMD", "AMDGPU"); 4901 return false; 4902 } 4903 4904 if (ParseDirectiveMajorMinor(Major, Minor)) 4905 return true; 4906 4907 if (!trySkipToken(AsmToken::Comma)) 4908 return TokError("stepping version number required, comma expected"); 4909 4910 if (ParseAsAbsoluteExpression(Stepping)) 4911 return TokError("invalid stepping version"); 4912 4913 if (!trySkipToken(AsmToken::Comma)) 4914 return TokError("vendor name required, comma expected"); 4915 4916 if (!parseString(VendorName, "invalid vendor name")) 4917 return true; 4918 4919 if (!trySkipToken(AsmToken::Comma)) 4920 return TokError("arch name required, comma expected"); 4921 4922 if (!parseString(ArchName, "invalid arch name")) 4923 return true; 4924 4925 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4926 VendorName, ArchName); 4927 return false; 4928 } 4929 4930 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4931 amd_kernel_code_t &Header) { 4932 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4933 // assembly for backwards compatibility. 4934 if (ID == "max_scratch_backing_memory_byte_size") { 4935 Parser.eatToEndOfStatement(); 4936 return false; 4937 } 4938 4939 SmallString<40> ErrStr; 4940 raw_svector_ostream Err(ErrStr); 4941 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4942 return TokError(Err.str()); 4943 } 4944 Lex(); 4945 4946 if (ID == "enable_wavefront_size32") { 4947 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4948 if (!isGFX10Plus()) 4949 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4950 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4951 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4952 } else { 4953 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4954 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4955 } 4956 } 4957 4958 if (ID == "wavefront_size") { 4959 if (Header.wavefront_size == 5) { 4960 if (!isGFX10Plus()) 4961 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4962 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4963 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4964 } else if (Header.wavefront_size == 6) { 4965 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4966 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4967 } 4968 } 4969 4970 if (ID == "enable_wgp_mode") { 4971 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4972 !isGFX10Plus()) 4973 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4974 } 4975 4976 if (ID == "enable_mem_ordered") { 4977 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4978 !isGFX10Plus()) 4979 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4980 } 4981 4982 if (ID == "enable_fwd_progress") { 4983 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4984 !isGFX10Plus()) 4985 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4986 } 4987 4988 return false; 4989 } 4990 4991 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4992 amd_kernel_code_t Header; 4993 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4994 4995 while (true) { 4996 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4997 // will set the current token to EndOfStatement. 4998 while(trySkipToken(AsmToken::EndOfStatement)); 4999 5000 StringRef ID; 5001 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5002 return true; 5003 5004 if (ID == ".end_amd_kernel_code_t") 5005 break; 5006 5007 if (ParseAMDKernelCodeTValue(ID, Header)) 5008 return true; 5009 } 5010 5011 getTargetStreamer().EmitAMDKernelCodeT(Header); 5012 5013 return false; 5014 } 5015 5016 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5017 StringRef KernelName; 5018 if (!parseId(KernelName, "expected symbol name")) 5019 return true; 5020 5021 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5022 ELF::STT_AMDGPU_HSA_KERNEL); 5023 5024 KernelScope.initialize(getContext()); 5025 return false; 5026 } 5027 5028 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5029 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5030 return Error(getLoc(), 5031 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5032 "architectures"); 5033 } 5034 5035 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5036 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5037 return Error(getParser().getTok().getLoc(), "target id must match options"); 5038 5039 getTargetStreamer().EmitISAVersion(); 5040 Lex(); 5041 5042 return false; 5043 } 5044 5045 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5046 const char *AssemblerDirectiveBegin; 5047 const char *AssemblerDirectiveEnd; 5048 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5049 isHsaAbiVersion3Or4(&getSTI()) 5050 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5051 HSAMD::V3::AssemblerDirectiveEnd) 5052 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5053 HSAMD::AssemblerDirectiveEnd); 5054 5055 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5056 return Error(getLoc(), 5057 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5058 "not available on non-amdhsa OSes")).str()); 5059 } 5060 5061 std::string HSAMetadataString; 5062 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5063 HSAMetadataString)) 5064 return true; 5065 5066 if (isHsaAbiVersion3Or4(&getSTI())) { 5067 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5068 return Error(getLoc(), "invalid HSA metadata"); 5069 } else { 5070 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5071 return Error(getLoc(), "invalid HSA metadata"); 5072 } 5073 5074 return false; 5075 } 5076 5077 /// Common code to parse out a block of text (typically YAML) between start and 5078 /// end directives. 5079 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5080 const char *AssemblerDirectiveEnd, 5081 std::string &CollectString) { 5082 5083 raw_string_ostream CollectStream(CollectString); 5084 5085 getLexer().setSkipSpace(false); 5086 5087 bool FoundEnd = false; 5088 while (!isToken(AsmToken::Eof)) { 5089 while (isToken(AsmToken::Space)) { 5090 CollectStream << getTokenStr(); 5091 Lex(); 5092 } 5093 5094 if (trySkipId(AssemblerDirectiveEnd)) { 5095 FoundEnd = true; 5096 break; 5097 } 5098 5099 CollectStream << Parser.parseStringToEndOfStatement() 5100 << getContext().getAsmInfo()->getSeparatorString(); 5101 5102 Parser.eatToEndOfStatement(); 5103 } 5104 5105 getLexer().setSkipSpace(true); 5106 5107 if (isToken(AsmToken::Eof) && !FoundEnd) { 5108 return TokError(Twine("expected directive ") + 5109 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5110 } 5111 5112 CollectStream.flush(); 5113 return false; 5114 } 5115 5116 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5117 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5118 std::string String; 5119 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5120 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5121 return true; 5122 5123 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5124 if (!PALMetadata->setFromString(String)) 5125 return Error(getLoc(), "invalid PAL metadata"); 5126 return false; 5127 } 5128 5129 /// Parse the assembler directive for old linear-format PAL metadata. 5130 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5131 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5132 return Error(getLoc(), 5133 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5134 "not available on non-amdpal OSes")).str()); 5135 } 5136 5137 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5138 PALMetadata->setLegacy(); 5139 for (;;) { 5140 uint32_t Key, Value; 5141 if (ParseAsAbsoluteExpression(Key)) { 5142 return TokError(Twine("invalid value in ") + 5143 Twine(PALMD::AssemblerDirective)); 5144 } 5145 if (!trySkipToken(AsmToken::Comma)) { 5146 return TokError(Twine("expected an even number of values in ") + 5147 Twine(PALMD::AssemblerDirective)); 5148 } 5149 if (ParseAsAbsoluteExpression(Value)) { 5150 return TokError(Twine("invalid value in ") + 5151 Twine(PALMD::AssemblerDirective)); 5152 } 5153 PALMetadata->setRegister(Key, Value); 5154 if (!trySkipToken(AsmToken::Comma)) 5155 break; 5156 } 5157 return false; 5158 } 5159 5160 /// ParseDirectiveAMDGPULDS 5161 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5162 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5163 if (getParser().checkForValidSection()) 5164 return true; 5165 5166 StringRef Name; 5167 SMLoc NameLoc = getLoc(); 5168 if (getParser().parseIdentifier(Name)) 5169 return TokError("expected identifier in directive"); 5170 5171 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5172 if (parseToken(AsmToken::Comma, "expected ','")) 5173 return true; 5174 5175 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5176 5177 int64_t Size; 5178 SMLoc SizeLoc = getLoc(); 5179 if (getParser().parseAbsoluteExpression(Size)) 5180 return true; 5181 if (Size < 0) 5182 return Error(SizeLoc, "size must be non-negative"); 5183 if (Size > LocalMemorySize) 5184 return Error(SizeLoc, "size is too large"); 5185 5186 int64_t Alignment = 4; 5187 if (trySkipToken(AsmToken::Comma)) { 5188 SMLoc AlignLoc = getLoc(); 5189 if (getParser().parseAbsoluteExpression(Alignment)) 5190 return true; 5191 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5192 return Error(AlignLoc, "alignment must be a power of two"); 5193 5194 // Alignment larger than the size of LDS is possible in theory, as long 5195 // as the linker manages to place to symbol at address 0, but we do want 5196 // to make sure the alignment fits nicely into a 32-bit integer. 5197 if (Alignment >= 1u << 31) 5198 return Error(AlignLoc, "alignment is too large"); 5199 } 5200 5201 if (parseToken(AsmToken::EndOfStatement, 5202 "unexpected token in '.amdgpu_lds' directive")) 5203 return true; 5204 5205 Symbol->redefineIfPossible(); 5206 if (!Symbol->isUndefined()) 5207 return Error(NameLoc, "invalid symbol redefinition"); 5208 5209 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5210 return false; 5211 } 5212 5213 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5214 StringRef IDVal = DirectiveID.getString(); 5215 5216 if (isHsaAbiVersion3Or4(&getSTI())) { 5217 if (IDVal == ".amdhsa_kernel") 5218 return ParseDirectiveAMDHSAKernel(); 5219 5220 // TODO: Restructure/combine with PAL metadata directive. 5221 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5222 return ParseDirectiveHSAMetadata(); 5223 } else { 5224 if (IDVal == ".hsa_code_object_version") 5225 return ParseDirectiveHSACodeObjectVersion(); 5226 5227 if (IDVal == ".hsa_code_object_isa") 5228 return ParseDirectiveHSACodeObjectISA(); 5229 5230 if (IDVal == ".amd_kernel_code_t") 5231 return ParseDirectiveAMDKernelCodeT(); 5232 5233 if (IDVal == ".amdgpu_hsa_kernel") 5234 return ParseDirectiveAMDGPUHsaKernel(); 5235 5236 if (IDVal == ".amd_amdgpu_isa") 5237 return ParseDirectiveISAVersion(); 5238 5239 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5240 return ParseDirectiveHSAMetadata(); 5241 } 5242 5243 if (IDVal == ".amdgcn_target") 5244 return ParseDirectiveAMDGCNTarget(); 5245 5246 if (IDVal == ".amdgpu_lds") 5247 return ParseDirectiveAMDGPULDS(); 5248 5249 if (IDVal == PALMD::AssemblerDirectiveBegin) 5250 return ParseDirectivePALMetadataBegin(); 5251 5252 if (IDVal == PALMD::AssemblerDirective) 5253 return ParseDirectivePALMetadata(); 5254 5255 return true; 5256 } 5257 5258 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5259 unsigned RegNo) { 5260 5261 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5262 R.isValid(); ++R) { 5263 if (*R == RegNo) 5264 return isGFX9Plus(); 5265 } 5266 5267 // GFX10 has 2 more SGPRs 104 and 105. 5268 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5269 R.isValid(); ++R) { 5270 if (*R == RegNo) 5271 return hasSGPR104_SGPR105(); 5272 } 5273 5274 switch (RegNo) { 5275 case AMDGPU::SRC_SHARED_BASE: 5276 case AMDGPU::SRC_SHARED_LIMIT: 5277 case AMDGPU::SRC_PRIVATE_BASE: 5278 case AMDGPU::SRC_PRIVATE_LIMIT: 5279 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5280 return isGFX9Plus(); 5281 case AMDGPU::TBA: 5282 case AMDGPU::TBA_LO: 5283 case AMDGPU::TBA_HI: 5284 case AMDGPU::TMA: 5285 case AMDGPU::TMA_LO: 5286 case AMDGPU::TMA_HI: 5287 return !isGFX9Plus(); 5288 case AMDGPU::XNACK_MASK: 5289 case AMDGPU::XNACK_MASK_LO: 5290 case AMDGPU::XNACK_MASK_HI: 5291 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5292 case AMDGPU::SGPR_NULL: 5293 return isGFX10Plus(); 5294 default: 5295 break; 5296 } 5297 5298 if (isCI()) 5299 return true; 5300 5301 if (isSI() || isGFX10Plus()) { 5302 // No flat_scr on SI. 5303 // On GFX10 flat scratch is not a valid register operand and can only be 5304 // accessed with s_setreg/s_getreg. 5305 switch (RegNo) { 5306 case AMDGPU::FLAT_SCR: 5307 case AMDGPU::FLAT_SCR_LO: 5308 case AMDGPU::FLAT_SCR_HI: 5309 return false; 5310 default: 5311 return true; 5312 } 5313 } 5314 5315 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5316 // SI/CI have. 5317 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5318 R.isValid(); ++R) { 5319 if (*R == RegNo) 5320 return hasSGPR102_SGPR103(); 5321 } 5322 5323 return true; 5324 } 5325 5326 OperandMatchResultTy 5327 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5328 OperandMode Mode) { 5329 // Try to parse with a custom parser 5330 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5331 5332 // If we successfully parsed the operand or if there as an error parsing, 5333 // we are done. 5334 // 5335 // If we are parsing after we reach EndOfStatement then this means we 5336 // are appending default values to the Operands list. This is only done 5337 // by custom parser, so we shouldn't continue on to the generic parsing. 5338 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5339 isToken(AsmToken::EndOfStatement)) 5340 return ResTy; 5341 5342 SMLoc RBraceLoc; 5343 SMLoc LBraceLoc = getLoc(); 5344 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5345 unsigned Prefix = Operands.size(); 5346 5347 for (;;) { 5348 auto Loc = getLoc(); 5349 ResTy = parseReg(Operands); 5350 if (ResTy == MatchOperand_NoMatch) 5351 Error(Loc, "expected a register"); 5352 if (ResTy != MatchOperand_Success) 5353 return MatchOperand_ParseFail; 5354 5355 RBraceLoc = getLoc(); 5356 if (trySkipToken(AsmToken::RBrac)) 5357 break; 5358 5359 if (!skipToken(AsmToken::Comma, 5360 "expected a comma or a closing square bracket")) { 5361 return MatchOperand_ParseFail; 5362 } 5363 } 5364 5365 if (Operands.size() - Prefix > 1) { 5366 Operands.insert(Operands.begin() + Prefix, 5367 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5368 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5369 } 5370 5371 return MatchOperand_Success; 5372 } 5373 5374 return parseRegOrImm(Operands); 5375 } 5376 5377 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5378 // Clear any forced encodings from the previous instruction. 5379 setForcedEncodingSize(0); 5380 setForcedDPP(false); 5381 setForcedSDWA(false); 5382 5383 if (Name.endswith("_e64")) { 5384 setForcedEncodingSize(64); 5385 return Name.substr(0, Name.size() - 4); 5386 } else if (Name.endswith("_e32")) { 5387 setForcedEncodingSize(32); 5388 return Name.substr(0, Name.size() - 4); 5389 } else if (Name.endswith("_dpp")) { 5390 setForcedDPP(true); 5391 return Name.substr(0, Name.size() - 4); 5392 } else if (Name.endswith("_sdwa")) { 5393 setForcedSDWA(true); 5394 return Name.substr(0, Name.size() - 5); 5395 } 5396 return Name; 5397 } 5398 5399 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5400 StringRef Name, 5401 SMLoc NameLoc, OperandVector &Operands) { 5402 // Add the instruction mnemonic 5403 Name = parseMnemonicSuffix(Name); 5404 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5405 5406 bool IsMIMG = Name.startswith("image_"); 5407 5408 while (!trySkipToken(AsmToken::EndOfStatement)) { 5409 OperandMode Mode = OperandMode_Default; 5410 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5411 Mode = OperandMode_NSA; 5412 CPolSeen = 0; 5413 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5414 5415 if (Res != MatchOperand_Success) { 5416 checkUnsupportedInstruction(Name, NameLoc); 5417 if (!Parser.hasPendingError()) { 5418 // FIXME: use real operand location rather than the current location. 5419 StringRef Msg = 5420 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5421 "not a valid operand."; 5422 Error(getLoc(), Msg); 5423 } 5424 while (!trySkipToken(AsmToken::EndOfStatement)) { 5425 lex(); 5426 } 5427 return true; 5428 } 5429 5430 // Eat the comma or space if there is one. 5431 trySkipToken(AsmToken::Comma); 5432 } 5433 5434 return false; 5435 } 5436 5437 //===----------------------------------------------------------------------===// 5438 // Utility functions 5439 //===----------------------------------------------------------------------===// 5440 5441 OperandMatchResultTy 5442 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5443 5444 if (!trySkipId(Prefix, AsmToken::Colon)) 5445 return MatchOperand_NoMatch; 5446 5447 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5448 } 5449 5450 OperandMatchResultTy 5451 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5452 AMDGPUOperand::ImmTy ImmTy, 5453 bool (*ConvertResult)(int64_t&)) { 5454 SMLoc S = getLoc(); 5455 int64_t Value = 0; 5456 5457 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5458 if (Res != MatchOperand_Success) 5459 return Res; 5460 5461 if (ConvertResult && !ConvertResult(Value)) { 5462 Error(S, "invalid " + StringRef(Prefix) + " value."); 5463 } 5464 5465 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5466 return MatchOperand_Success; 5467 } 5468 5469 OperandMatchResultTy 5470 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5471 OperandVector &Operands, 5472 AMDGPUOperand::ImmTy ImmTy, 5473 bool (*ConvertResult)(int64_t&)) { 5474 SMLoc S = getLoc(); 5475 if (!trySkipId(Prefix, AsmToken::Colon)) 5476 return MatchOperand_NoMatch; 5477 5478 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5479 return MatchOperand_ParseFail; 5480 5481 unsigned Val = 0; 5482 const unsigned MaxSize = 4; 5483 5484 // FIXME: How to verify the number of elements matches the number of src 5485 // operands? 5486 for (int I = 0; ; ++I) { 5487 int64_t Op; 5488 SMLoc Loc = getLoc(); 5489 if (!parseExpr(Op)) 5490 return MatchOperand_ParseFail; 5491 5492 if (Op != 0 && Op != 1) { 5493 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5494 return MatchOperand_ParseFail; 5495 } 5496 5497 Val |= (Op << I); 5498 5499 if (trySkipToken(AsmToken::RBrac)) 5500 break; 5501 5502 if (I + 1 == MaxSize) { 5503 Error(getLoc(), "expected a closing square bracket"); 5504 return MatchOperand_ParseFail; 5505 } 5506 5507 if (!skipToken(AsmToken::Comma, "expected a comma")) 5508 return MatchOperand_ParseFail; 5509 } 5510 5511 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5512 return MatchOperand_Success; 5513 } 5514 5515 OperandMatchResultTy 5516 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5517 AMDGPUOperand::ImmTy ImmTy) { 5518 int64_t Bit; 5519 SMLoc S = getLoc(); 5520 5521 if (trySkipId(Name)) { 5522 Bit = 1; 5523 } else if (trySkipId("no", Name)) { 5524 Bit = 0; 5525 } else { 5526 return MatchOperand_NoMatch; 5527 } 5528 5529 if (Name == "r128" && !hasMIMG_R128()) { 5530 Error(S, "r128 modifier is not supported on this GPU"); 5531 return MatchOperand_ParseFail; 5532 } 5533 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5534 Error(S, "a16 modifier is not supported on this GPU"); 5535 return MatchOperand_ParseFail; 5536 } 5537 5538 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5539 ImmTy = AMDGPUOperand::ImmTyR128A16; 5540 5541 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5542 return MatchOperand_Success; 5543 } 5544 5545 OperandMatchResultTy 5546 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5547 unsigned CPolOn = 0; 5548 unsigned CPolOff = 0; 5549 SMLoc S = getLoc(); 5550 5551 if (trySkipId("glc")) 5552 CPolOn = AMDGPU::CPol::GLC; 5553 else if (trySkipId("noglc")) 5554 CPolOff = AMDGPU::CPol::GLC; 5555 else if (trySkipId("slc")) 5556 CPolOn = AMDGPU::CPol::SLC; 5557 else if (trySkipId("noslc")) 5558 CPolOff = AMDGPU::CPol::SLC; 5559 else if (trySkipId("dlc")) 5560 CPolOn = AMDGPU::CPol::DLC; 5561 else if (trySkipId("nodlc")) 5562 CPolOff = AMDGPU::CPol::DLC; 5563 else if (trySkipId("scc")) 5564 CPolOn = AMDGPU::CPol::SCC; 5565 else if (trySkipId("noscc")) 5566 CPolOff = AMDGPU::CPol::SCC; 5567 else 5568 return MatchOperand_NoMatch; 5569 5570 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5571 Error(S, "dlc modifier is not supported on this GPU"); 5572 return MatchOperand_ParseFail; 5573 } 5574 5575 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5576 Error(S, "scc modifier is not supported on this GPU"); 5577 return MatchOperand_ParseFail; 5578 } 5579 5580 if (CPolSeen & (CPolOn | CPolOff)) { 5581 Error(S, "duplicate cache policy modifier"); 5582 return MatchOperand_ParseFail; 5583 } 5584 5585 CPolSeen |= (CPolOn | CPolOff); 5586 5587 for (unsigned I = 1; I != Operands.size(); ++I) { 5588 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5589 if (Op.isCPol()) { 5590 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5591 return MatchOperand_Success; 5592 } 5593 } 5594 5595 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5596 AMDGPUOperand::ImmTyCPol)); 5597 5598 return MatchOperand_Success; 5599 } 5600 5601 static void addOptionalImmOperand( 5602 MCInst& Inst, const OperandVector& Operands, 5603 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5604 AMDGPUOperand::ImmTy ImmT, 5605 int64_t Default = 0) { 5606 auto i = OptionalIdx.find(ImmT); 5607 if (i != OptionalIdx.end()) { 5608 unsigned Idx = i->second; 5609 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5610 } else { 5611 Inst.addOperand(MCOperand::createImm(Default)); 5612 } 5613 } 5614 5615 OperandMatchResultTy 5616 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5617 StringRef &Value, 5618 SMLoc &StringLoc) { 5619 if (!trySkipId(Prefix, AsmToken::Colon)) 5620 return MatchOperand_NoMatch; 5621 5622 StringLoc = getLoc(); 5623 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5624 : MatchOperand_ParseFail; 5625 } 5626 5627 //===----------------------------------------------------------------------===// 5628 // MTBUF format 5629 //===----------------------------------------------------------------------===// 5630 5631 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5632 int64_t MaxVal, 5633 int64_t &Fmt) { 5634 int64_t Val; 5635 SMLoc Loc = getLoc(); 5636 5637 auto Res = parseIntWithPrefix(Pref, Val); 5638 if (Res == MatchOperand_ParseFail) 5639 return false; 5640 if (Res == MatchOperand_NoMatch) 5641 return true; 5642 5643 if (Val < 0 || Val > MaxVal) { 5644 Error(Loc, Twine("out of range ", StringRef(Pref))); 5645 return false; 5646 } 5647 5648 Fmt = Val; 5649 return true; 5650 } 5651 5652 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5653 // values to live in a joint format operand in the MCInst encoding. 5654 OperandMatchResultTy 5655 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5656 using namespace llvm::AMDGPU::MTBUFFormat; 5657 5658 int64_t Dfmt = DFMT_UNDEF; 5659 int64_t Nfmt = NFMT_UNDEF; 5660 5661 // dfmt and nfmt can appear in either order, and each is optional. 5662 for (int I = 0; I < 2; ++I) { 5663 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5664 return MatchOperand_ParseFail; 5665 5666 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5667 return MatchOperand_ParseFail; 5668 } 5669 // Skip optional comma between dfmt/nfmt 5670 // but guard against 2 commas following each other. 5671 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5672 !peekToken().is(AsmToken::Comma)) { 5673 trySkipToken(AsmToken::Comma); 5674 } 5675 } 5676 5677 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5678 return MatchOperand_NoMatch; 5679 5680 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5681 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5682 5683 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5684 return MatchOperand_Success; 5685 } 5686 5687 OperandMatchResultTy 5688 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5689 using namespace llvm::AMDGPU::MTBUFFormat; 5690 5691 int64_t Fmt = UFMT_UNDEF; 5692 5693 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5694 return MatchOperand_ParseFail; 5695 5696 if (Fmt == UFMT_UNDEF) 5697 return MatchOperand_NoMatch; 5698 5699 Format = Fmt; 5700 return MatchOperand_Success; 5701 } 5702 5703 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5704 int64_t &Nfmt, 5705 StringRef FormatStr, 5706 SMLoc Loc) { 5707 using namespace llvm::AMDGPU::MTBUFFormat; 5708 int64_t Format; 5709 5710 Format = getDfmt(FormatStr); 5711 if (Format != DFMT_UNDEF) { 5712 Dfmt = Format; 5713 return true; 5714 } 5715 5716 Format = getNfmt(FormatStr, getSTI()); 5717 if (Format != NFMT_UNDEF) { 5718 Nfmt = Format; 5719 return true; 5720 } 5721 5722 Error(Loc, "unsupported format"); 5723 return false; 5724 } 5725 5726 OperandMatchResultTy 5727 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5728 SMLoc FormatLoc, 5729 int64_t &Format) { 5730 using namespace llvm::AMDGPU::MTBUFFormat; 5731 5732 int64_t Dfmt = DFMT_UNDEF; 5733 int64_t Nfmt = NFMT_UNDEF; 5734 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5735 return MatchOperand_ParseFail; 5736 5737 if (trySkipToken(AsmToken::Comma)) { 5738 StringRef Str; 5739 SMLoc Loc = getLoc(); 5740 if (!parseId(Str, "expected a format string") || 5741 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5742 return MatchOperand_ParseFail; 5743 } 5744 if (Dfmt == DFMT_UNDEF) { 5745 Error(Loc, "duplicate numeric format"); 5746 return MatchOperand_ParseFail; 5747 } else if (Nfmt == NFMT_UNDEF) { 5748 Error(Loc, "duplicate data format"); 5749 return MatchOperand_ParseFail; 5750 } 5751 } 5752 5753 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5754 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5755 5756 if (isGFX10Plus()) { 5757 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5758 if (Ufmt == UFMT_UNDEF) { 5759 Error(FormatLoc, "unsupported format"); 5760 return MatchOperand_ParseFail; 5761 } 5762 Format = Ufmt; 5763 } else { 5764 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5765 } 5766 5767 return MatchOperand_Success; 5768 } 5769 5770 OperandMatchResultTy 5771 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5772 SMLoc Loc, 5773 int64_t &Format) { 5774 using namespace llvm::AMDGPU::MTBUFFormat; 5775 5776 auto Id = getUnifiedFormat(FormatStr); 5777 if (Id == UFMT_UNDEF) 5778 return MatchOperand_NoMatch; 5779 5780 if (!isGFX10Plus()) { 5781 Error(Loc, "unified format is not supported on this GPU"); 5782 return MatchOperand_ParseFail; 5783 } 5784 5785 Format = Id; 5786 return MatchOperand_Success; 5787 } 5788 5789 OperandMatchResultTy 5790 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5791 using namespace llvm::AMDGPU::MTBUFFormat; 5792 SMLoc Loc = getLoc(); 5793 5794 if (!parseExpr(Format)) 5795 return MatchOperand_ParseFail; 5796 if (!isValidFormatEncoding(Format, getSTI())) { 5797 Error(Loc, "out of range format"); 5798 return MatchOperand_ParseFail; 5799 } 5800 5801 return MatchOperand_Success; 5802 } 5803 5804 OperandMatchResultTy 5805 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5806 using namespace llvm::AMDGPU::MTBUFFormat; 5807 5808 if (!trySkipId("format", AsmToken::Colon)) 5809 return MatchOperand_NoMatch; 5810 5811 if (trySkipToken(AsmToken::LBrac)) { 5812 StringRef FormatStr; 5813 SMLoc Loc = getLoc(); 5814 if (!parseId(FormatStr, "expected a format string")) 5815 return MatchOperand_ParseFail; 5816 5817 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5818 if (Res == MatchOperand_NoMatch) 5819 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5820 if (Res != MatchOperand_Success) 5821 return Res; 5822 5823 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5824 return MatchOperand_ParseFail; 5825 5826 return MatchOperand_Success; 5827 } 5828 5829 return parseNumericFormat(Format); 5830 } 5831 5832 OperandMatchResultTy 5833 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5834 using namespace llvm::AMDGPU::MTBUFFormat; 5835 5836 int64_t Format = getDefaultFormatEncoding(getSTI()); 5837 OperandMatchResultTy Res; 5838 SMLoc Loc = getLoc(); 5839 5840 // Parse legacy format syntax. 5841 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5842 if (Res == MatchOperand_ParseFail) 5843 return Res; 5844 5845 bool FormatFound = (Res == MatchOperand_Success); 5846 5847 Operands.push_back( 5848 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5849 5850 if (FormatFound) 5851 trySkipToken(AsmToken::Comma); 5852 5853 if (isToken(AsmToken::EndOfStatement)) { 5854 // We are expecting an soffset operand, 5855 // but let matcher handle the error. 5856 return MatchOperand_Success; 5857 } 5858 5859 // Parse soffset. 5860 Res = parseRegOrImm(Operands); 5861 if (Res != MatchOperand_Success) 5862 return Res; 5863 5864 trySkipToken(AsmToken::Comma); 5865 5866 if (!FormatFound) { 5867 Res = parseSymbolicOrNumericFormat(Format); 5868 if (Res == MatchOperand_ParseFail) 5869 return Res; 5870 if (Res == MatchOperand_Success) { 5871 auto Size = Operands.size(); 5872 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5873 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5874 Op.setImm(Format); 5875 } 5876 return MatchOperand_Success; 5877 } 5878 5879 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5880 Error(getLoc(), "duplicate format"); 5881 return MatchOperand_ParseFail; 5882 } 5883 return MatchOperand_Success; 5884 } 5885 5886 //===----------------------------------------------------------------------===// 5887 // ds 5888 //===----------------------------------------------------------------------===// 5889 5890 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5891 const OperandVector &Operands) { 5892 OptionalImmIndexMap OptionalIdx; 5893 5894 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5895 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5896 5897 // Add the register arguments 5898 if (Op.isReg()) { 5899 Op.addRegOperands(Inst, 1); 5900 continue; 5901 } 5902 5903 // Handle optional arguments 5904 OptionalIdx[Op.getImmTy()] = i; 5905 } 5906 5907 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5908 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5909 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5910 5911 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5912 } 5913 5914 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5915 bool IsGdsHardcoded) { 5916 OptionalImmIndexMap OptionalIdx; 5917 5918 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5919 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5920 5921 // Add the register arguments 5922 if (Op.isReg()) { 5923 Op.addRegOperands(Inst, 1); 5924 continue; 5925 } 5926 5927 if (Op.isToken() && Op.getToken() == "gds") { 5928 IsGdsHardcoded = true; 5929 continue; 5930 } 5931 5932 // Handle optional arguments 5933 OptionalIdx[Op.getImmTy()] = i; 5934 } 5935 5936 AMDGPUOperand::ImmTy OffsetType = 5937 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5938 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5939 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5940 AMDGPUOperand::ImmTyOffset; 5941 5942 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5943 5944 if (!IsGdsHardcoded) { 5945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5946 } 5947 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5948 } 5949 5950 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5951 OptionalImmIndexMap OptionalIdx; 5952 5953 unsigned OperandIdx[4]; 5954 unsigned EnMask = 0; 5955 int SrcIdx = 0; 5956 5957 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5958 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5959 5960 // Add the register arguments 5961 if (Op.isReg()) { 5962 assert(SrcIdx < 4); 5963 OperandIdx[SrcIdx] = Inst.size(); 5964 Op.addRegOperands(Inst, 1); 5965 ++SrcIdx; 5966 continue; 5967 } 5968 5969 if (Op.isOff()) { 5970 assert(SrcIdx < 4); 5971 OperandIdx[SrcIdx] = Inst.size(); 5972 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5973 ++SrcIdx; 5974 continue; 5975 } 5976 5977 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5978 Op.addImmOperands(Inst, 1); 5979 continue; 5980 } 5981 5982 if (Op.isToken() && Op.getToken() == "done") 5983 continue; 5984 5985 // Handle optional arguments 5986 OptionalIdx[Op.getImmTy()] = i; 5987 } 5988 5989 assert(SrcIdx == 4); 5990 5991 bool Compr = false; 5992 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5993 Compr = true; 5994 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5995 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5996 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5997 } 5998 5999 for (auto i = 0; i < SrcIdx; ++i) { 6000 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6001 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6002 } 6003 } 6004 6005 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6007 6008 Inst.addOperand(MCOperand::createImm(EnMask)); 6009 } 6010 6011 //===----------------------------------------------------------------------===// 6012 // s_waitcnt 6013 //===----------------------------------------------------------------------===// 6014 6015 static bool 6016 encodeCnt( 6017 const AMDGPU::IsaVersion ISA, 6018 int64_t &IntVal, 6019 int64_t CntVal, 6020 bool Saturate, 6021 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6022 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6023 { 6024 bool Failed = false; 6025 6026 IntVal = encode(ISA, IntVal, CntVal); 6027 if (CntVal != decode(ISA, IntVal)) { 6028 if (Saturate) { 6029 IntVal = encode(ISA, IntVal, -1); 6030 } else { 6031 Failed = true; 6032 } 6033 } 6034 return Failed; 6035 } 6036 6037 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6038 6039 SMLoc CntLoc = getLoc(); 6040 StringRef CntName = getTokenStr(); 6041 6042 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6043 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6044 return false; 6045 6046 int64_t CntVal; 6047 SMLoc ValLoc = getLoc(); 6048 if (!parseExpr(CntVal)) 6049 return false; 6050 6051 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6052 6053 bool Failed = true; 6054 bool Sat = CntName.endswith("_sat"); 6055 6056 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6057 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6058 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6059 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6060 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6061 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6062 } else { 6063 Error(CntLoc, "invalid counter name " + CntName); 6064 return false; 6065 } 6066 6067 if (Failed) { 6068 Error(ValLoc, "too large value for " + CntName); 6069 return false; 6070 } 6071 6072 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6073 return false; 6074 6075 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6076 if (isToken(AsmToken::EndOfStatement)) { 6077 Error(getLoc(), "expected a counter name"); 6078 return false; 6079 } 6080 } 6081 6082 return true; 6083 } 6084 6085 OperandMatchResultTy 6086 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6087 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6088 int64_t Waitcnt = getWaitcntBitMask(ISA); 6089 SMLoc S = getLoc(); 6090 6091 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6092 while (!isToken(AsmToken::EndOfStatement)) { 6093 if (!parseCnt(Waitcnt)) 6094 return MatchOperand_ParseFail; 6095 } 6096 } else { 6097 if (!parseExpr(Waitcnt)) 6098 return MatchOperand_ParseFail; 6099 } 6100 6101 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6102 return MatchOperand_Success; 6103 } 6104 6105 bool 6106 AMDGPUOperand::isSWaitCnt() const { 6107 return isImm(); 6108 } 6109 6110 //===----------------------------------------------------------------------===// 6111 // hwreg 6112 //===----------------------------------------------------------------------===// 6113 6114 bool 6115 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6116 OperandInfoTy &Offset, 6117 OperandInfoTy &Width) { 6118 using namespace llvm::AMDGPU::Hwreg; 6119 6120 // The register may be specified by name or using a numeric code 6121 HwReg.Loc = getLoc(); 6122 if (isToken(AsmToken::Identifier) && 6123 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6124 HwReg.IsSymbolic = true; 6125 lex(); // skip register name 6126 } else if (!parseExpr(HwReg.Id, "a register name")) { 6127 return false; 6128 } 6129 6130 if (trySkipToken(AsmToken::RParen)) 6131 return true; 6132 6133 // parse optional params 6134 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6135 return false; 6136 6137 Offset.Loc = getLoc(); 6138 if (!parseExpr(Offset.Id)) 6139 return false; 6140 6141 if (!skipToken(AsmToken::Comma, "expected a comma")) 6142 return false; 6143 6144 Width.Loc = getLoc(); 6145 return parseExpr(Width.Id) && 6146 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6147 } 6148 6149 bool 6150 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6151 const OperandInfoTy &Offset, 6152 const OperandInfoTy &Width) { 6153 6154 using namespace llvm::AMDGPU::Hwreg; 6155 6156 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6157 Error(HwReg.Loc, 6158 "specified hardware register is not supported on this GPU"); 6159 return false; 6160 } 6161 if (!isValidHwreg(HwReg.Id)) { 6162 Error(HwReg.Loc, 6163 "invalid code of hardware register: only 6-bit values are legal"); 6164 return false; 6165 } 6166 if (!isValidHwregOffset(Offset.Id)) { 6167 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6168 return false; 6169 } 6170 if (!isValidHwregWidth(Width.Id)) { 6171 Error(Width.Loc, 6172 "invalid bitfield width: only values from 1 to 32 are legal"); 6173 return false; 6174 } 6175 return true; 6176 } 6177 6178 OperandMatchResultTy 6179 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6180 using namespace llvm::AMDGPU::Hwreg; 6181 6182 int64_t ImmVal = 0; 6183 SMLoc Loc = getLoc(); 6184 6185 if (trySkipId("hwreg", AsmToken::LParen)) { 6186 OperandInfoTy HwReg(ID_UNKNOWN_); 6187 OperandInfoTy Offset(OFFSET_DEFAULT_); 6188 OperandInfoTy Width(WIDTH_DEFAULT_); 6189 if (parseHwregBody(HwReg, Offset, Width) && 6190 validateHwreg(HwReg, Offset, Width)) { 6191 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6192 } else { 6193 return MatchOperand_ParseFail; 6194 } 6195 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6196 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6197 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6198 return MatchOperand_ParseFail; 6199 } 6200 } else { 6201 return MatchOperand_ParseFail; 6202 } 6203 6204 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6205 return MatchOperand_Success; 6206 } 6207 6208 bool AMDGPUOperand::isHwreg() const { 6209 return isImmTy(ImmTyHwreg); 6210 } 6211 6212 //===----------------------------------------------------------------------===// 6213 // sendmsg 6214 //===----------------------------------------------------------------------===// 6215 6216 bool 6217 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6218 OperandInfoTy &Op, 6219 OperandInfoTy &Stream) { 6220 using namespace llvm::AMDGPU::SendMsg; 6221 6222 Msg.Loc = getLoc(); 6223 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6224 Msg.IsSymbolic = true; 6225 lex(); // skip message name 6226 } else if (!parseExpr(Msg.Id, "a message name")) { 6227 return false; 6228 } 6229 6230 if (trySkipToken(AsmToken::Comma)) { 6231 Op.IsDefined = true; 6232 Op.Loc = getLoc(); 6233 if (isToken(AsmToken::Identifier) && 6234 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6235 lex(); // skip operation name 6236 } else if (!parseExpr(Op.Id, "an operation name")) { 6237 return false; 6238 } 6239 6240 if (trySkipToken(AsmToken::Comma)) { 6241 Stream.IsDefined = true; 6242 Stream.Loc = getLoc(); 6243 if (!parseExpr(Stream.Id)) 6244 return false; 6245 } 6246 } 6247 6248 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6249 } 6250 6251 bool 6252 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6253 const OperandInfoTy &Op, 6254 const OperandInfoTy &Stream) { 6255 using namespace llvm::AMDGPU::SendMsg; 6256 6257 // Validation strictness depends on whether message is specified 6258 // in a symbolc or in a numeric form. In the latter case 6259 // only encoding possibility is checked. 6260 bool Strict = Msg.IsSymbolic; 6261 6262 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6263 Error(Msg.Loc, "invalid message id"); 6264 return false; 6265 } 6266 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6267 if (Op.IsDefined) { 6268 Error(Op.Loc, "message does not support operations"); 6269 } else { 6270 Error(Msg.Loc, "missing message operation"); 6271 } 6272 return false; 6273 } 6274 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6275 Error(Op.Loc, "invalid operation id"); 6276 return false; 6277 } 6278 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6279 Error(Stream.Loc, "message operation does not support streams"); 6280 return false; 6281 } 6282 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6283 Error(Stream.Loc, "invalid message stream id"); 6284 return false; 6285 } 6286 return true; 6287 } 6288 6289 OperandMatchResultTy 6290 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6291 using namespace llvm::AMDGPU::SendMsg; 6292 6293 int64_t ImmVal = 0; 6294 SMLoc Loc = getLoc(); 6295 6296 if (trySkipId("sendmsg", AsmToken::LParen)) { 6297 OperandInfoTy Msg(ID_UNKNOWN_); 6298 OperandInfoTy Op(OP_NONE_); 6299 OperandInfoTy Stream(STREAM_ID_NONE_); 6300 if (parseSendMsgBody(Msg, Op, Stream) && 6301 validateSendMsg(Msg, Op, Stream)) { 6302 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6303 } else { 6304 return MatchOperand_ParseFail; 6305 } 6306 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6307 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6308 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6309 return MatchOperand_ParseFail; 6310 } 6311 } else { 6312 return MatchOperand_ParseFail; 6313 } 6314 6315 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6316 return MatchOperand_Success; 6317 } 6318 6319 bool AMDGPUOperand::isSendMsg() const { 6320 return isImmTy(ImmTySendMsg); 6321 } 6322 6323 //===----------------------------------------------------------------------===// 6324 // v_interp 6325 //===----------------------------------------------------------------------===// 6326 6327 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6328 StringRef Str; 6329 SMLoc S = getLoc(); 6330 6331 if (!parseId(Str)) 6332 return MatchOperand_NoMatch; 6333 6334 int Slot = StringSwitch<int>(Str) 6335 .Case("p10", 0) 6336 .Case("p20", 1) 6337 .Case("p0", 2) 6338 .Default(-1); 6339 6340 if (Slot == -1) { 6341 Error(S, "invalid interpolation slot"); 6342 return MatchOperand_ParseFail; 6343 } 6344 6345 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6346 AMDGPUOperand::ImmTyInterpSlot)); 6347 return MatchOperand_Success; 6348 } 6349 6350 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6351 StringRef Str; 6352 SMLoc S = getLoc(); 6353 6354 if (!parseId(Str)) 6355 return MatchOperand_NoMatch; 6356 6357 if (!Str.startswith("attr")) { 6358 Error(S, "invalid interpolation attribute"); 6359 return MatchOperand_ParseFail; 6360 } 6361 6362 StringRef Chan = Str.take_back(2); 6363 int AttrChan = StringSwitch<int>(Chan) 6364 .Case(".x", 0) 6365 .Case(".y", 1) 6366 .Case(".z", 2) 6367 .Case(".w", 3) 6368 .Default(-1); 6369 if (AttrChan == -1) { 6370 Error(S, "invalid or missing interpolation attribute channel"); 6371 return MatchOperand_ParseFail; 6372 } 6373 6374 Str = Str.drop_back(2).drop_front(4); 6375 6376 uint8_t Attr; 6377 if (Str.getAsInteger(10, Attr)) { 6378 Error(S, "invalid or missing interpolation attribute number"); 6379 return MatchOperand_ParseFail; 6380 } 6381 6382 if (Attr > 63) { 6383 Error(S, "out of bounds interpolation attribute number"); 6384 return MatchOperand_ParseFail; 6385 } 6386 6387 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6388 6389 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6390 AMDGPUOperand::ImmTyInterpAttr)); 6391 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6392 AMDGPUOperand::ImmTyAttrChan)); 6393 return MatchOperand_Success; 6394 } 6395 6396 //===----------------------------------------------------------------------===// 6397 // exp 6398 //===----------------------------------------------------------------------===// 6399 6400 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6401 using namespace llvm::AMDGPU::Exp; 6402 6403 StringRef Str; 6404 SMLoc S = getLoc(); 6405 6406 if (!parseId(Str)) 6407 return MatchOperand_NoMatch; 6408 6409 unsigned Id = getTgtId(Str); 6410 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6411 Error(S, (Id == ET_INVALID) ? 6412 "invalid exp target" : 6413 "exp target is not supported on this GPU"); 6414 return MatchOperand_ParseFail; 6415 } 6416 6417 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6418 AMDGPUOperand::ImmTyExpTgt)); 6419 return MatchOperand_Success; 6420 } 6421 6422 //===----------------------------------------------------------------------===// 6423 // parser helpers 6424 //===----------------------------------------------------------------------===// 6425 6426 bool 6427 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6428 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6429 } 6430 6431 bool 6432 AMDGPUAsmParser::isId(const StringRef Id) const { 6433 return isId(getToken(), Id); 6434 } 6435 6436 bool 6437 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6438 return getTokenKind() == Kind; 6439 } 6440 6441 bool 6442 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6443 if (isId(Id)) { 6444 lex(); 6445 return true; 6446 } 6447 return false; 6448 } 6449 6450 bool 6451 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6452 if (isToken(AsmToken::Identifier)) { 6453 StringRef Tok = getTokenStr(); 6454 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6455 lex(); 6456 return true; 6457 } 6458 } 6459 return false; 6460 } 6461 6462 bool 6463 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6464 if (isId(Id) && peekToken().is(Kind)) { 6465 lex(); 6466 lex(); 6467 return true; 6468 } 6469 return false; 6470 } 6471 6472 bool 6473 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6474 if (isToken(Kind)) { 6475 lex(); 6476 return true; 6477 } 6478 return false; 6479 } 6480 6481 bool 6482 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6483 const StringRef ErrMsg) { 6484 if (!trySkipToken(Kind)) { 6485 Error(getLoc(), ErrMsg); 6486 return false; 6487 } 6488 return true; 6489 } 6490 6491 bool 6492 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6493 SMLoc S = getLoc(); 6494 6495 const MCExpr *Expr; 6496 if (Parser.parseExpression(Expr)) 6497 return false; 6498 6499 if (Expr->evaluateAsAbsolute(Imm)) 6500 return true; 6501 6502 if (Expected.empty()) { 6503 Error(S, "expected absolute expression"); 6504 } else { 6505 Error(S, Twine("expected ", Expected) + 6506 Twine(" or an absolute expression")); 6507 } 6508 return false; 6509 } 6510 6511 bool 6512 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6513 SMLoc S = getLoc(); 6514 6515 const MCExpr *Expr; 6516 if (Parser.parseExpression(Expr)) 6517 return false; 6518 6519 int64_t IntVal; 6520 if (Expr->evaluateAsAbsolute(IntVal)) { 6521 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6522 } else { 6523 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6524 } 6525 return true; 6526 } 6527 6528 bool 6529 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6530 if (isToken(AsmToken::String)) { 6531 Val = getToken().getStringContents(); 6532 lex(); 6533 return true; 6534 } else { 6535 Error(getLoc(), ErrMsg); 6536 return false; 6537 } 6538 } 6539 6540 bool 6541 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6542 if (isToken(AsmToken::Identifier)) { 6543 Val = getTokenStr(); 6544 lex(); 6545 return true; 6546 } else { 6547 if (!ErrMsg.empty()) 6548 Error(getLoc(), ErrMsg); 6549 return false; 6550 } 6551 } 6552 6553 AsmToken 6554 AMDGPUAsmParser::getToken() const { 6555 return Parser.getTok(); 6556 } 6557 6558 AsmToken 6559 AMDGPUAsmParser::peekToken() { 6560 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6561 } 6562 6563 void 6564 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6565 auto TokCount = getLexer().peekTokens(Tokens); 6566 6567 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6568 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6569 } 6570 6571 AsmToken::TokenKind 6572 AMDGPUAsmParser::getTokenKind() const { 6573 return getLexer().getKind(); 6574 } 6575 6576 SMLoc 6577 AMDGPUAsmParser::getLoc() const { 6578 return getToken().getLoc(); 6579 } 6580 6581 StringRef 6582 AMDGPUAsmParser::getTokenStr() const { 6583 return getToken().getString(); 6584 } 6585 6586 void 6587 AMDGPUAsmParser::lex() { 6588 Parser.Lex(); 6589 } 6590 6591 SMLoc 6592 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6593 const OperandVector &Operands) const { 6594 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6595 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6596 if (Test(Op)) 6597 return Op.getStartLoc(); 6598 } 6599 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6600 } 6601 6602 SMLoc 6603 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6604 const OperandVector &Operands) const { 6605 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6606 return getOperandLoc(Test, Operands); 6607 } 6608 6609 SMLoc 6610 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6611 const OperandVector &Operands) const { 6612 auto Test = [=](const AMDGPUOperand& Op) { 6613 return Op.isRegKind() && Op.getReg() == Reg; 6614 }; 6615 return getOperandLoc(Test, Operands); 6616 } 6617 6618 SMLoc 6619 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6620 auto Test = [](const AMDGPUOperand& Op) { 6621 return Op.IsImmKindLiteral() || Op.isExpr(); 6622 }; 6623 return getOperandLoc(Test, Operands); 6624 } 6625 6626 SMLoc 6627 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6628 auto Test = [](const AMDGPUOperand& Op) { 6629 return Op.isImmKindConst(); 6630 }; 6631 return getOperandLoc(Test, Operands); 6632 } 6633 6634 //===----------------------------------------------------------------------===// 6635 // swizzle 6636 //===----------------------------------------------------------------------===// 6637 6638 LLVM_READNONE 6639 static unsigned 6640 encodeBitmaskPerm(const unsigned AndMask, 6641 const unsigned OrMask, 6642 const unsigned XorMask) { 6643 using namespace llvm::AMDGPU::Swizzle; 6644 6645 return BITMASK_PERM_ENC | 6646 (AndMask << BITMASK_AND_SHIFT) | 6647 (OrMask << BITMASK_OR_SHIFT) | 6648 (XorMask << BITMASK_XOR_SHIFT); 6649 } 6650 6651 bool 6652 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6653 const unsigned MinVal, 6654 const unsigned MaxVal, 6655 const StringRef ErrMsg, 6656 SMLoc &Loc) { 6657 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6658 return false; 6659 } 6660 Loc = getLoc(); 6661 if (!parseExpr(Op)) { 6662 return false; 6663 } 6664 if (Op < MinVal || Op > MaxVal) { 6665 Error(Loc, ErrMsg); 6666 return false; 6667 } 6668 6669 return true; 6670 } 6671 6672 bool 6673 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6674 const unsigned MinVal, 6675 const unsigned MaxVal, 6676 const StringRef ErrMsg) { 6677 SMLoc Loc; 6678 for (unsigned i = 0; i < OpNum; ++i) { 6679 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6680 return false; 6681 } 6682 6683 return true; 6684 } 6685 6686 bool 6687 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6688 using namespace llvm::AMDGPU::Swizzle; 6689 6690 int64_t Lane[LANE_NUM]; 6691 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6692 "expected a 2-bit lane id")) { 6693 Imm = QUAD_PERM_ENC; 6694 for (unsigned I = 0; I < LANE_NUM; ++I) { 6695 Imm |= Lane[I] << (LANE_SHIFT * I); 6696 } 6697 return true; 6698 } 6699 return false; 6700 } 6701 6702 bool 6703 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6704 using namespace llvm::AMDGPU::Swizzle; 6705 6706 SMLoc Loc; 6707 int64_t GroupSize; 6708 int64_t LaneIdx; 6709 6710 if (!parseSwizzleOperand(GroupSize, 6711 2, 32, 6712 "group size must be in the interval [2,32]", 6713 Loc)) { 6714 return false; 6715 } 6716 if (!isPowerOf2_64(GroupSize)) { 6717 Error(Loc, "group size must be a power of two"); 6718 return false; 6719 } 6720 if (parseSwizzleOperand(LaneIdx, 6721 0, GroupSize - 1, 6722 "lane id must be in the interval [0,group size - 1]", 6723 Loc)) { 6724 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6725 return true; 6726 } 6727 return false; 6728 } 6729 6730 bool 6731 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6732 using namespace llvm::AMDGPU::Swizzle; 6733 6734 SMLoc Loc; 6735 int64_t GroupSize; 6736 6737 if (!parseSwizzleOperand(GroupSize, 6738 2, 32, 6739 "group size must be in the interval [2,32]", 6740 Loc)) { 6741 return false; 6742 } 6743 if (!isPowerOf2_64(GroupSize)) { 6744 Error(Loc, "group size must be a power of two"); 6745 return false; 6746 } 6747 6748 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6749 return true; 6750 } 6751 6752 bool 6753 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6754 using namespace llvm::AMDGPU::Swizzle; 6755 6756 SMLoc Loc; 6757 int64_t GroupSize; 6758 6759 if (!parseSwizzleOperand(GroupSize, 6760 1, 16, 6761 "group size must be in the interval [1,16]", 6762 Loc)) { 6763 return false; 6764 } 6765 if (!isPowerOf2_64(GroupSize)) { 6766 Error(Loc, "group size must be a power of two"); 6767 return false; 6768 } 6769 6770 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6771 return true; 6772 } 6773 6774 bool 6775 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6776 using namespace llvm::AMDGPU::Swizzle; 6777 6778 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6779 return false; 6780 } 6781 6782 StringRef Ctl; 6783 SMLoc StrLoc = getLoc(); 6784 if (!parseString(Ctl)) { 6785 return false; 6786 } 6787 if (Ctl.size() != BITMASK_WIDTH) { 6788 Error(StrLoc, "expected a 5-character mask"); 6789 return false; 6790 } 6791 6792 unsigned AndMask = 0; 6793 unsigned OrMask = 0; 6794 unsigned XorMask = 0; 6795 6796 for (size_t i = 0; i < Ctl.size(); ++i) { 6797 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6798 switch(Ctl[i]) { 6799 default: 6800 Error(StrLoc, "invalid mask"); 6801 return false; 6802 case '0': 6803 break; 6804 case '1': 6805 OrMask |= Mask; 6806 break; 6807 case 'p': 6808 AndMask |= Mask; 6809 break; 6810 case 'i': 6811 AndMask |= Mask; 6812 XorMask |= Mask; 6813 break; 6814 } 6815 } 6816 6817 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6818 return true; 6819 } 6820 6821 bool 6822 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6823 6824 SMLoc OffsetLoc = getLoc(); 6825 6826 if (!parseExpr(Imm, "a swizzle macro")) { 6827 return false; 6828 } 6829 if (!isUInt<16>(Imm)) { 6830 Error(OffsetLoc, "expected a 16-bit offset"); 6831 return false; 6832 } 6833 return true; 6834 } 6835 6836 bool 6837 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6838 using namespace llvm::AMDGPU::Swizzle; 6839 6840 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6841 6842 SMLoc ModeLoc = getLoc(); 6843 bool Ok = false; 6844 6845 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6846 Ok = parseSwizzleQuadPerm(Imm); 6847 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6848 Ok = parseSwizzleBitmaskPerm(Imm); 6849 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6850 Ok = parseSwizzleBroadcast(Imm); 6851 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6852 Ok = parseSwizzleSwap(Imm); 6853 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6854 Ok = parseSwizzleReverse(Imm); 6855 } else { 6856 Error(ModeLoc, "expected a swizzle mode"); 6857 } 6858 6859 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6860 } 6861 6862 return false; 6863 } 6864 6865 OperandMatchResultTy 6866 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6867 SMLoc S = getLoc(); 6868 int64_t Imm = 0; 6869 6870 if (trySkipId("offset")) { 6871 6872 bool Ok = false; 6873 if (skipToken(AsmToken::Colon, "expected a colon")) { 6874 if (trySkipId("swizzle")) { 6875 Ok = parseSwizzleMacro(Imm); 6876 } else { 6877 Ok = parseSwizzleOffset(Imm); 6878 } 6879 } 6880 6881 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6882 6883 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6884 } else { 6885 // Swizzle "offset" operand is optional. 6886 // If it is omitted, try parsing other optional operands. 6887 return parseOptionalOpr(Operands); 6888 } 6889 } 6890 6891 bool 6892 AMDGPUOperand::isSwizzle() const { 6893 return isImmTy(ImmTySwizzle); 6894 } 6895 6896 //===----------------------------------------------------------------------===// 6897 // VGPR Index Mode 6898 //===----------------------------------------------------------------------===// 6899 6900 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6901 6902 using namespace llvm::AMDGPU::VGPRIndexMode; 6903 6904 if (trySkipToken(AsmToken::RParen)) { 6905 return OFF; 6906 } 6907 6908 int64_t Imm = 0; 6909 6910 while (true) { 6911 unsigned Mode = 0; 6912 SMLoc S = getLoc(); 6913 6914 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6915 if (trySkipId(IdSymbolic[ModeId])) { 6916 Mode = 1 << ModeId; 6917 break; 6918 } 6919 } 6920 6921 if (Mode == 0) { 6922 Error(S, (Imm == 0)? 6923 "expected a VGPR index mode or a closing parenthesis" : 6924 "expected a VGPR index mode"); 6925 return UNDEF; 6926 } 6927 6928 if (Imm & Mode) { 6929 Error(S, "duplicate VGPR index mode"); 6930 return UNDEF; 6931 } 6932 Imm |= Mode; 6933 6934 if (trySkipToken(AsmToken::RParen)) 6935 break; 6936 if (!skipToken(AsmToken::Comma, 6937 "expected a comma or a closing parenthesis")) 6938 return UNDEF; 6939 } 6940 6941 return Imm; 6942 } 6943 6944 OperandMatchResultTy 6945 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6946 6947 using namespace llvm::AMDGPU::VGPRIndexMode; 6948 6949 int64_t Imm = 0; 6950 SMLoc S = getLoc(); 6951 6952 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6953 Imm = parseGPRIdxMacro(); 6954 if (Imm == UNDEF) 6955 return MatchOperand_ParseFail; 6956 } else { 6957 if (getParser().parseAbsoluteExpression(Imm)) 6958 return MatchOperand_ParseFail; 6959 if (Imm < 0 || !isUInt<4>(Imm)) { 6960 Error(S, "invalid immediate: only 4-bit values are legal"); 6961 return MatchOperand_ParseFail; 6962 } 6963 } 6964 6965 Operands.push_back( 6966 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6967 return MatchOperand_Success; 6968 } 6969 6970 bool AMDGPUOperand::isGPRIdxMode() const { 6971 return isImmTy(ImmTyGprIdxMode); 6972 } 6973 6974 //===----------------------------------------------------------------------===// 6975 // sopp branch targets 6976 //===----------------------------------------------------------------------===// 6977 6978 OperandMatchResultTy 6979 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6980 6981 // Make sure we are not parsing something 6982 // that looks like a label or an expression but is not. 6983 // This will improve error messages. 6984 if (isRegister() || isModifier()) 6985 return MatchOperand_NoMatch; 6986 6987 if (!parseExpr(Operands)) 6988 return MatchOperand_ParseFail; 6989 6990 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6991 assert(Opr.isImm() || Opr.isExpr()); 6992 SMLoc Loc = Opr.getStartLoc(); 6993 6994 // Currently we do not support arbitrary expressions as branch targets. 6995 // Only labels and absolute expressions are accepted. 6996 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6997 Error(Loc, "expected an absolute expression or a label"); 6998 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6999 Error(Loc, "expected a 16-bit signed jump offset"); 7000 } 7001 7002 return MatchOperand_Success; 7003 } 7004 7005 //===----------------------------------------------------------------------===// 7006 // Boolean holding registers 7007 //===----------------------------------------------------------------------===// 7008 7009 OperandMatchResultTy 7010 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7011 return parseReg(Operands); 7012 } 7013 7014 //===----------------------------------------------------------------------===// 7015 // mubuf 7016 //===----------------------------------------------------------------------===// 7017 7018 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7019 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7020 } 7021 7022 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7023 const OperandVector &Operands, 7024 bool IsAtomic, 7025 bool IsLds) { 7026 bool IsLdsOpcode = IsLds; 7027 bool HasLdsModifier = false; 7028 OptionalImmIndexMap OptionalIdx; 7029 unsigned FirstOperandIdx = 1; 7030 bool IsAtomicReturn = false; 7031 7032 if (IsAtomic) { 7033 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7034 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7035 if (!Op.isCPol()) 7036 continue; 7037 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7038 break; 7039 } 7040 7041 if (!IsAtomicReturn) { 7042 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7043 if (NewOpc != -1) 7044 Inst.setOpcode(NewOpc); 7045 } 7046 7047 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7048 SIInstrFlags::IsAtomicRet; 7049 } 7050 7051 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7052 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7053 7054 // Add the register arguments 7055 if (Op.isReg()) { 7056 Op.addRegOperands(Inst, 1); 7057 // Insert a tied src for atomic return dst. 7058 // This cannot be postponed as subsequent calls to 7059 // addImmOperands rely on correct number of MC operands. 7060 if (IsAtomicReturn && i == FirstOperandIdx) 7061 Op.addRegOperands(Inst, 1); 7062 continue; 7063 } 7064 7065 // Handle the case where soffset is an immediate 7066 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7067 Op.addImmOperands(Inst, 1); 7068 continue; 7069 } 7070 7071 HasLdsModifier |= Op.isLDS(); 7072 7073 // Handle tokens like 'offen' which are sometimes hard-coded into the 7074 // asm string. There are no MCInst operands for these. 7075 if (Op.isToken()) { 7076 continue; 7077 } 7078 assert(Op.isImm()); 7079 7080 // Handle optional arguments 7081 OptionalIdx[Op.getImmTy()] = i; 7082 } 7083 7084 // This is a workaround for an llvm quirk which may result in an 7085 // incorrect instruction selection. Lds and non-lds versions of 7086 // MUBUF instructions are identical except that lds versions 7087 // have mandatory 'lds' modifier. However this modifier follows 7088 // optional modifiers and llvm asm matcher regards this 'lds' 7089 // modifier as an optional one. As a result, an lds version 7090 // of opcode may be selected even if it has no 'lds' modifier. 7091 if (IsLdsOpcode && !HasLdsModifier) { 7092 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7093 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7094 Inst.setOpcode(NoLdsOpcode); 7095 IsLdsOpcode = false; 7096 } 7097 } 7098 7099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7100 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7101 7102 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7103 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7104 } 7105 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7106 } 7107 7108 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7109 OptionalImmIndexMap OptionalIdx; 7110 7111 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7112 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7113 7114 // Add the register arguments 7115 if (Op.isReg()) { 7116 Op.addRegOperands(Inst, 1); 7117 continue; 7118 } 7119 7120 // Handle the case where soffset is an immediate 7121 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7122 Op.addImmOperands(Inst, 1); 7123 continue; 7124 } 7125 7126 // Handle tokens like 'offen' which are sometimes hard-coded into the 7127 // asm string. There are no MCInst operands for these. 7128 if (Op.isToken()) { 7129 continue; 7130 } 7131 assert(Op.isImm()); 7132 7133 // Handle optional arguments 7134 OptionalIdx[Op.getImmTy()] = i; 7135 } 7136 7137 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7138 AMDGPUOperand::ImmTyOffset); 7139 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7140 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7141 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7142 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7143 } 7144 7145 //===----------------------------------------------------------------------===// 7146 // mimg 7147 //===----------------------------------------------------------------------===// 7148 7149 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7150 bool IsAtomic) { 7151 unsigned I = 1; 7152 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7153 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7154 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7155 } 7156 7157 if (IsAtomic) { 7158 // Add src, same as dst 7159 assert(Desc.getNumDefs() == 1); 7160 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7161 } 7162 7163 OptionalImmIndexMap OptionalIdx; 7164 7165 for (unsigned E = Operands.size(); I != E; ++I) { 7166 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7167 7168 // Add the register arguments 7169 if (Op.isReg()) { 7170 Op.addRegOperands(Inst, 1); 7171 } else if (Op.isImmModifier()) { 7172 OptionalIdx[Op.getImmTy()] = I; 7173 } else if (!Op.isToken()) { 7174 llvm_unreachable("unexpected operand type"); 7175 } 7176 } 7177 7178 bool IsGFX10Plus = isGFX10Plus(); 7179 7180 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7181 if (IsGFX10Plus) 7182 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7184 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7185 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7186 if (IsGFX10Plus) 7187 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7188 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7190 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7191 if (!IsGFX10Plus) 7192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7194 } 7195 7196 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7197 cvtMIMG(Inst, Operands, true); 7198 } 7199 7200 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7201 OptionalImmIndexMap OptionalIdx; 7202 bool IsAtomicReturn = false; 7203 7204 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7205 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7206 if (!Op.isCPol()) 7207 continue; 7208 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7209 break; 7210 } 7211 7212 if (!IsAtomicReturn) { 7213 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7214 if (NewOpc != -1) 7215 Inst.setOpcode(NewOpc); 7216 } 7217 7218 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7219 SIInstrFlags::IsAtomicRet; 7220 7221 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7222 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7223 7224 // Add the register arguments 7225 if (Op.isReg()) { 7226 Op.addRegOperands(Inst, 1); 7227 if (IsAtomicReturn && i == 1) 7228 Op.addRegOperands(Inst, 1); 7229 continue; 7230 } 7231 7232 // Handle the case where soffset is an immediate 7233 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7234 Op.addImmOperands(Inst, 1); 7235 continue; 7236 } 7237 7238 // Handle tokens like 'offen' which are sometimes hard-coded into the 7239 // asm string. There are no MCInst operands for these. 7240 if (Op.isToken()) { 7241 continue; 7242 } 7243 assert(Op.isImm()); 7244 7245 // Handle optional arguments 7246 OptionalIdx[Op.getImmTy()] = i; 7247 } 7248 7249 if ((int)Inst.getNumOperands() <= 7250 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7251 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7252 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7253 } 7254 7255 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7256 const OperandVector &Operands) { 7257 for (unsigned I = 1; I < Operands.size(); ++I) { 7258 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7259 if (Operand.isReg()) 7260 Operand.addRegOperands(Inst, 1); 7261 } 7262 7263 Inst.addOperand(MCOperand::createImm(1)); // a16 7264 } 7265 7266 //===----------------------------------------------------------------------===// 7267 // smrd 7268 //===----------------------------------------------------------------------===// 7269 7270 bool AMDGPUOperand::isSMRDOffset8() const { 7271 return isImm() && isUInt<8>(getImm()); 7272 } 7273 7274 bool AMDGPUOperand::isSMEMOffset() const { 7275 return isImm(); // Offset range is checked later by validator. 7276 } 7277 7278 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7279 // 32-bit literals are only supported on CI and we only want to use them 7280 // when the offset is > 8-bits. 7281 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7282 } 7283 7284 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7285 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7286 } 7287 7288 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7289 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7290 } 7291 7292 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7293 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7294 } 7295 7296 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7297 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7298 } 7299 7300 //===----------------------------------------------------------------------===// 7301 // vop3 7302 //===----------------------------------------------------------------------===// 7303 7304 static bool ConvertOmodMul(int64_t &Mul) { 7305 if (Mul != 1 && Mul != 2 && Mul != 4) 7306 return false; 7307 7308 Mul >>= 1; 7309 return true; 7310 } 7311 7312 static bool ConvertOmodDiv(int64_t &Div) { 7313 if (Div == 1) { 7314 Div = 0; 7315 return true; 7316 } 7317 7318 if (Div == 2) { 7319 Div = 3; 7320 return true; 7321 } 7322 7323 return false; 7324 } 7325 7326 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7327 // This is intentional and ensures compatibility with sp3. 7328 // See bug 35397 for details. 7329 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7330 if (BoundCtrl == 0 || BoundCtrl == 1) { 7331 BoundCtrl = 1; 7332 return true; 7333 } 7334 return false; 7335 } 7336 7337 // Note: the order in this table matches the order of operands in AsmString. 7338 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7339 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7340 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7341 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7342 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7343 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7344 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7345 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7346 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7347 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7348 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7349 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7350 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7351 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7352 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7353 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7354 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7355 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7356 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7357 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7358 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7359 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7360 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7361 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7362 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7363 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7364 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7365 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7366 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7367 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7368 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7369 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7370 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7371 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7372 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7373 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7374 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7375 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7376 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7377 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7378 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7379 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7380 }; 7381 7382 void AMDGPUAsmParser::onBeginOfFile() { 7383 if (!getParser().getStreamer().getTargetStreamer() || 7384 getSTI().getTargetTriple().getArch() == Triple::r600) 7385 return; 7386 7387 if (!getTargetStreamer().getTargetID()) 7388 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7389 7390 if (isHsaAbiVersion3Or4(&getSTI())) 7391 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7392 } 7393 7394 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7395 7396 OperandMatchResultTy res = parseOptionalOpr(Operands); 7397 7398 // This is a hack to enable hardcoded mandatory operands which follow 7399 // optional operands. 7400 // 7401 // Current design assumes that all operands after the first optional operand 7402 // are also optional. However implementation of some instructions violates 7403 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7404 // 7405 // To alleviate this problem, we have to (implicitly) parse extra operands 7406 // to make sure autogenerated parser of custom operands never hit hardcoded 7407 // mandatory operands. 7408 7409 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7410 if (res != MatchOperand_Success || 7411 isToken(AsmToken::EndOfStatement)) 7412 break; 7413 7414 trySkipToken(AsmToken::Comma); 7415 res = parseOptionalOpr(Operands); 7416 } 7417 7418 return res; 7419 } 7420 7421 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7422 OperandMatchResultTy res; 7423 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7424 // try to parse any optional operand here 7425 if (Op.IsBit) { 7426 res = parseNamedBit(Op.Name, Operands, Op.Type); 7427 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7428 res = parseOModOperand(Operands); 7429 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7430 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7431 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7432 res = parseSDWASel(Operands, Op.Name, Op.Type); 7433 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7434 res = parseSDWADstUnused(Operands); 7435 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7436 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7437 Op.Type == AMDGPUOperand::ImmTyNegLo || 7438 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7439 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7440 Op.ConvertResult); 7441 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7442 res = parseDim(Operands); 7443 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7444 res = parseCPol(Operands); 7445 } else { 7446 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7447 } 7448 if (res != MatchOperand_NoMatch) { 7449 return res; 7450 } 7451 } 7452 return MatchOperand_NoMatch; 7453 } 7454 7455 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7456 StringRef Name = getTokenStr(); 7457 if (Name == "mul") { 7458 return parseIntWithPrefix("mul", Operands, 7459 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7460 } 7461 7462 if (Name == "div") { 7463 return parseIntWithPrefix("div", Operands, 7464 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7465 } 7466 7467 return MatchOperand_NoMatch; 7468 } 7469 7470 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7471 cvtVOP3P(Inst, Operands); 7472 7473 int Opc = Inst.getOpcode(); 7474 7475 int SrcNum; 7476 const int Ops[] = { AMDGPU::OpName::src0, 7477 AMDGPU::OpName::src1, 7478 AMDGPU::OpName::src2 }; 7479 for (SrcNum = 0; 7480 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7481 ++SrcNum); 7482 assert(SrcNum > 0); 7483 7484 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7485 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7486 7487 if ((OpSel & (1 << SrcNum)) != 0) { 7488 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7489 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7490 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7491 } 7492 } 7493 7494 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7495 // 1. This operand is input modifiers 7496 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7497 // 2. This is not last operand 7498 && Desc.NumOperands > (OpNum + 1) 7499 // 3. Next operand is register class 7500 && Desc.OpInfo[OpNum + 1].RegClass != -1 7501 // 4. Next register is not tied to any other operand 7502 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7503 } 7504 7505 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7506 { 7507 OptionalImmIndexMap OptionalIdx; 7508 unsigned Opc = Inst.getOpcode(); 7509 7510 unsigned I = 1; 7511 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7512 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7513 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7514 } 7515 7516 for (unsigned E = Operands.size(); I != E; ++I) { 7517 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7518 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7519 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7520 } else if (Op.isInterpSlot() || 7521 Op.isInterpAttr() || 7522 Op.isAttrChan()) { 7523 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7524 } else if (Op.isImmModifier()) { 7525 OptionalIdx[Op.getImmTy()] = I; 7526 } else { 7527 llvm_unreachable("unhandled operand type"); 7528 } 7529 } 7530 7531 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7533 } 7534 7535 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7536 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7537 } 7538 7539 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7540 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7541 } 7542 } 7543 7544 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7545 OptionalImmIndexMap &OptionalIdx) { 7546 unsigned Opc = Inst.getOpcode(); 7547 7548 unsigned I = 1; 7549 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7550 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7551 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7552 } 7553 7554 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7555 // This instruction has src modifiers 7556 for (unsigned E = Operands.size(); I != E; ++I) { 7557 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7558 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7559 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7560 } else if (Op.isImmModifier()) { 7561 OptionalIdx[Op.getImmTy()] = I; 7562 } else if (Op.isRegOrImm()) { 7563 Op.addRegOrImmOperands(Inst, 1); 7564 } else { 7565 llvm_unreachable("unhandled operand type"); 7566 } 7567 } 7568 } else { 7569 // No src modifiers 7570 for (unsigned E = Operands.size(); I != E; ++I) { 7571 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7572 if (Op.isMod()) { 7573 OptionalIdx[Op.getImmTy()] = I; 7574 } else { 7575 Op.addRegOrImmOperands(Inst, 1); 7576 } 7577 } 7578 } 7579 7580 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7582 } 7583 7584 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7585 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7586 } 7587 7588 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7589 // it has src2 register operand that is tied to dst operand 7590 // we don't allow modifiers for this operand in assembler so src2_modifiers 7591 // should be 0. 7592 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7593 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7594 Opc == AMDGPU::V_MAC_F32_e64_vi || 7595 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7596 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7597 Opc == AMDGPU::V_MAC_F16_e64_vi || 7598 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7599 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7600 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7601 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7602 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7603 auto it = Inst.begin(); 7604 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7605 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7606 ++it; 7607 // Copy the operand to ensure it's not invalidated when Inst grows. 7608 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7609 } 7610 } 7611 7612 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7613 OptionalImmIndexMap OptionalIdx; 7614 cvtVOP3(Inst, Operands, OptionalIdx); 7615 } 7616 7617 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7618 OptionalImmIndexMap &OptIdx) { 7619 const int Opc = Inst.getOpcode(); 7620 const MCInstrDesc &Desc = MII.get(Opc); 7621 7622 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7623 7624 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7625 assert(!IsPacked); 7626 Inst.addOperand(Inst.getOperand(0)); 7627 } 7628 7629 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7630 // instruction, and then figure out where to actually put the modifiers 7631 7632 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7633 if (OpSelIdx != -1) { 7634 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7635 } 7636 7637 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7638 if (OpSelHiIdx != -1) { 7639 int DefaultVal = IsPacked ? -1 : 0; 7640 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7641 DefaultVal); 7642 } 7643 7644 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7645 if (NegLoIdx != -1) { 7646 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7647 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7648 } 7649 7650 const int Ops[] = { AMDGPU::OpName::src0, 7651 AMDGPU::OpName::src1, 7652 AMDGPU::OpName::src2 }; 7653 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7654 AMDGPU::OpName::src1_modifiers, 7655 AMDGPU::OpName::src2_modifiers }; 7656 7657 unsigned OpSel = 0; 7658 unsigned OpSelHi = 0; 7659 unsigned NegLo = 0; 7660 unsigned NegHi = 0; 7661 7662 if (OpSelIdx != -1) 7663 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7664 7665 if (OpSelHiIdx != -1) 7666 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7667 7668 if (NegLoIdx != -1) { 7669 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7670 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7671 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7672 } 7673 7674 for (int J = 0; J < 3; ++J) { 7675 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7676 if (OpIdx == -1) 7677 break; 7678 7679 uint32_t ModVal = 0; 7680 7681 if ((OpSel & (1 << J)) != 0) 7682 ModVal |= SISrcMods::OP_SEL_0; 7683 7684 if ((OpSelHi & (1 << J)) != 0) 7685 ModVal |= SISrcMods::OP_SEL_1; 7686 7687 if ((NegLo & (1 << J)) != 0) 7688 ModVal |= SISrcMods::NEG; 7689 7690 if ((NegHi & (1 << J)) != 0) 7691 ModVal |= SISrcMods::NEG_HI; 7692 7693 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7694 7695 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7696 } 7697 } 7698 7699 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7700 OptionalImmIndexMap OptIdx; 7701 cvtVOP3(Inst, Operands, OptIdx); 7702 cvtVOP3P(Inst, Operands, OptIdx); 7703 } 7704 7705 //===----------------------------------------------------------------------===// 7706 // dpp 7707 //===----------------------------------------------------------------------===// 7708 7709 bool AMDGPUOperand::isDPP8() const { 7710 return isImmTy(ImmTyDPP8); 7711 } 7712 7713 bool AMDGPUOperand::isDPPCtrl() const { 7714 using namespace AMDGPU::DPP; 7715 7716 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7717 if (result) { 7718 int64_t Imm = getImm(); 7719 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7720 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7721 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7722 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7723 (Imm == DppCtrl::WAVE_SHL1) || 7724 (Imm == DppCtrl::WAVE_ROL1) || 7725 (Imm == DppCtrl::WAVE_SHR1) || 7726 (Imm == DppCtrl::WAVE_ROR1) || 7727 (Imm == DppCtrl::ROW_MIRROR) || 7728 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7729 (Imm == DppCtrl::BCAST15) || 7730 (Imm == DppCtrl::BCAST31) || 7731 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7732 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7733 } 7734 return false; 7735 } 7736 7737 //===----------------------------------------------------------------------===// 7738 // mAI 7739 //===----------------------------------------------------------------------===// 7740 7741 bool AMDGPUOperand::isBLGP() const { 7742 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7743 } 7744 7745 bool AMDGPUOperand::isCBSZ() const { 7746 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7747 } 7748 7749 bool AMDGPUOperand::isABID() const { 7750 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7751 } 7752 7753 bool AMDGPUOperand::isS16Imm() const { 7754 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7755 } 7756 7757 bool AMDGPUOperand::isU16Imm() const { 7758 return isImm() && isUInt<16>(getImm()); 7759 } 7760 7761 //===----------------------------------------------------------------------===// 7762 // dim 7763 //===----------------------------------------------------------------------===// 7764 7765 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7766 // We want to allow "dim:1D" etc., 7767 // but the initial 1 is tokenized as an integer. 7768 std::string Token; 7769 if (isToken(AsmToken::Integer)) { 7770 SMLoc Loc = getToken().getEndLoc(); 7771 Token = std::string(getTokenStr()); 7772 lex(); 7773 if (getLoc() != Loc) 7774 return false; 7775 } 7776 7777 StringRef Suffix; 7778 if (!parseId(Suffix)) 7779 return false; 7780 Token += Suffix; 7781 7782 StringRef DimId = Token; 7783 if (DimId.startswith("SQ_RSRC_IMG_")) 7784 DimId = DimId.drop_front(12); 7785 7786 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7787 if (!DimInfo) 7788 return false; 7789 7790 Encoding = DimInfo->Encoding; 7791 return true; 7792 } 7793 7794 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7795 if (!isGFX10Plus()) 7796 return MatchOperand_NoMatch; 7797 7798 SMLoc S = getLoc(); 7799 7800 if (!trySkipId("dim", AsmToken::Colon)) 7801 return MatchOperand_NoMatch; 7802 7803 unsigned Encoding; 7804 SMLoc Loc = getLoc(); 7805 if (!parseDimId(Encoding)) { 7806 Error(Loc, "invalid dim value"); 7807 return MatchOperand_ParseFail; 7808 } 7809 7810 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7811 AMDGPUOperand::ImmTyDim)); 7812 return MatchOperand_Success; 7813 } 7814 7815 //===----------------------------------------------------------------------===// 7816 // dpp 7817 //===----------------------------------------------------------------------===// 7818 7819 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7820 SMLoc S = getLoc(); 7821 7822 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7823 return MatchOperand_NoMatch; 7824 7825 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7826 7827 int64_t Sels[8]; 7828 7829 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7830 return MatchOperand_ParseFail; 7831 7832 for (size_t i = 0; i < 8; ++i) { 7833 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7834 return MatchOperand_ParseFail; 7835 7836 SMLoc Loc = getLoc(); 7837 if (getParser().parseAbsoluteExpression(Sels[i])) 7838 return MatchOperand_ParseFail; 7839 if (0 > Sels[i] || 7 < Sels[i]) { 7840 Error(Loc, "expected a 3-bit value"); 7841 return MatchOperand_ParseFail; 7842 } 7843 } 7844 7845 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7846 return MatchOperand_ParseFail; 7847 7848 unsigned DPP8 = 0; 7849 for (size_t i = 0; i < 8; ++i) 7850 DPP8 |= (Sels[i] << (i * 3)); 7851 7852 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7853 return MatchOperand_Success; 7854 } 7855 7856 bool 7857 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7858 const OperandVector &Operands) { 7859 if (Ctrl == "row_newbcast") 7860 return isGFX90A(); 7861 7862 if (Ctrl == "row_share" || 7863 Ctrl == "row_xmask") 7864 return isGFX10Plus(); 7865 7866 if (Ctrl == "wave_shl" || 7867 Ctrl == "wave_shr" || 7868 Ctrl == "wave_rol" || 7869 Ctrl == "wave_ror" || 7870 Ctrl == "row_bcast") 7871 return isVI() || isGFX9(); 7872 7873 return Ctrl == "row_mirror" || 7874 Ctrl == "row_half_mirror" || 7875 Ctrl == "quad_perm" || 7876 Ctrl == "row_shl" || 7877 Ctrl == "row_shr" || 7878 Ctrl == "row_ror"; 7879 } 7880 7881 int64_t 7882 AMDGPUAsmParser::parseDPPCtrlPerm() { 7883 // quad_perm:[%d,%d,%d,%d] 7884 7885 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7886 return -1; 7887 7888 int64_t Val = 0; 7889 for (int i = 0; i < 4; ++i) { 7890 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7891 return -1; 7892 7893 int64_t Temp; 7894 SMLoc Loc = getLoc(); 7895 if (getParser().parseAbsoluteExpression(Temp)) 7896 return -1; 7897 if (Temp < 0 || Temp > 3) { 7898 Error(Loc, "expected a 2-bit value"); 7899 return -1; 7900 } 7901 7902 Val += (Temp << i * 2); 7903 } 7904 7905 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7906 return -1; 7907 7908 return Val; 7909 } 7910 7911 int64_t 7912 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7913 using namespace AMDGPU::DPP; 7914 7915 // sel:%d 7916 7917 int64_t Val; 7918 SMLoc Loc = getLoc(); 7919 7920 if (getParser().parseAbsoluteExpression(Val)) 7921 return -1; 7922 7923 struct DppCtrlCheck { 7924 int64_t Ctrl; 7925 int Lo; 7926 int Hi; 7927 }; 7928 7929 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7930 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7931 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7932 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7933 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7934 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7935 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7936 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7937 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7938 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7939 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7940 .Default({-1, 0, 0}); 7941 7942 bool Valid; 7943 if (Check.Ctrl == -1) { 7944 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7945 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7946 } else { 7947 Valid = Check.Lo <= Val && Val <= Check.Hi; 7948 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7949 } 7950 7951 if (!Valid) { 7952 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7953 return -1; 7954 } 7955 7956 return Val; 7957 } 7958 7959 OperandMatchResultTy 7960 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7961 using namespace AMDGPU::DPP; 7962 7963 if (!isToken(AsmToken::Identifier) || 7964 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7965 return MatchOperand_NoMatch; 7966 7967 SMLoc S = getLoc(); 7968 int64_t Val = -1; 7969 StringRef Ctrl; 7970 7971 parseId(Ctrl); 7972 7973 if (Ctrl == "row_mirror") { 7974 Val = DppCtrl::ROW_MIRROR; 7975 } else if (Ctrl == "row_half_mirror") { 7976 Val = DppCtrl::ROW_HALF_MIRROR; 7977 } else { 7978 if (skipToken(AsmToken::Colon, "expected a colon")) { 7979 if (Ctrl == "quad_perm") { 7980 Val = parseDPPCtrlPerm(); 7981 } else { 7982 Val = parseDPPCtrlSel(Ctrl); 7983 } 7984 } 7985 } 7986 7987 if (Val == -1) 7988 return MatchOperand_ParseFail; 7989 7990 Operands.push_back( 7991 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7992 return MatchOperand_Success; 7993 } 7994 7995 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7996 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7997 } 7998 7999 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8000 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8001 } 8002 8003 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8004 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8005 } 8006 8007 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8008 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8009 } 8010 8011 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8012 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8013 } 8014 8015 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8016 OptionalImmIndexMap OptionalIdx; 8017 8018 unsigned Opc = Inst.getOpcode(); 8019 bool HasModifiers = 8020 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8021 unsigned I = 1; 8022 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8023 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8024 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8025 } 8026 8027 int Fi = 0; 8028 for (unsigned E = Operands.size(); I != E; ++I) { 8029 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8030 MCOI::TIED_TO); 8031 if (TiedTo != -1) { 8032 assert((unsigned)TiedTo < Inst.getNumOperands()); 8033 // handle tied old or src2 for MAC instructions 8034 Inst.addOperand(Inst.getOperand(TiedTo)); 8035 } 8036 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8037 // Add the register arguments 8038 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8039 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8040 // Skip it. 8041 continue; 8042 } 8043 8044 if (IsDPP8) { 8045 if (Op.isDPP8()) { 8046 Op.addImmOperands(Inst, 1); 8047 } else if (HasModifiers && 8048 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8049 Op.addRegWithFPInputModsOperands(Inst, 2); 8050 } else if (Op.isFI()) { 8051 Fi = Op.getImm(); 8052 } else if (Op.isReg()) { 8053 Op.addRegOperands(Inst, 1); 8054 } else { 8055 llvm_unreachable("Invalid operand type"); 8056 } 8057 } else { 8058 if (HasModifiers && 8059 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8060 Op.addRegWithFPInputModsOperands(Inst, 2); 8061 } else if (Op.isReg()) { 8062 Op.addRegOperands(Inst, 1); 8063 } else if (Op.isDPPCtrl()) { 8064 Op.addImmOperands(Inst, 1); 8065 } else if (Op.isImm()) { 8066 // Handle optional arguments 8067 OptionalIdx[Op.getImmTy()] = I; 8068 } else { 8069 llvm_unreachable("Invalid operand type"); 8070 } 8071 } 8072 } 8073 8074 if (IsDPP8) { 8075 using namespace llvm::AMDGPU::DPP; 8076 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8077 } else { 8078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8081 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8082 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8083 } 8084 } 8085 } 8086 8087 //===----------------------------------------------------------------------===// 8088 // sdwa 8089 //===----------------------------------------------------------------------===// 8090 8091 OperandMatchResultTy 8092 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8093 AMDGPUOperand::ImmTy Type) { 8094 using namespace llvm::AMDGPU::SDWA; 8095 8096 SMLoc S = getLoc(); 8097 StringRef Value; 8098 OperandMatchResultTy res; 8099 8100 SMLoc StringLoc; 8101 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8102 if (res != MatchOperand_Success) { 8103 return res; 8104 } 8105 8106 int64_t Int; 8107 Int = StringSwitch<int64_t>(Value) 8108 .Case("BYTE_0", SdwaSel::BYTE_0) 8109 .Case("BYTE_1", SdwaSel::BYTE_1) 8110 .Case("BYTE_2", SdwaSel::BYTE_2) 8111 .Case("BYTE_3", SdwaSel::BYTE_3) 8112 .Case("WORD_0", SdwaSel::WORD_0) 8113 .Case("WORD_1", SdwaSel::WORD_1) 8114 .Case("DWORD", SdwaSel::DWORD) 8115 .Default(0xffffffff); 8116 8117 if (Int == 0xffffffff) { 8118 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8119 return MatchOperand_ParseFail; 8120 } 8121 8122 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8123 return MatchOperand_Success; 8124 } 8125 8126 OperandMatchResultTy 8127 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8128 using namespace llvm::AMDGPU::SDWA; 8129 8130 SMLoc S = getLoc(); 8131 StringRef Value; 8132 OperandMatchResultTy res; 8133 8134 SMLoc StringLoc; 8135 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8136 if (res != MatchOperand_Success) { 8137 return res; 8138 } 8139 8140 int64_t Int; 8141 Int = StringSwitch<int64_t>(Value) 8142 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8143 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8144 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8145 .Default(0xffffffff); 8146 8147 if (Int == 0xffffffff) { 8148 Error(StringLoc, "invalid dst_unused value"); 8149 return MatchOperand_ParseFail; 8150 } 8151 8152 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8153 return MatchOperand_Success; 8154 } 8155 8156 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8157 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8158 } 8159 8160 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8161 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8162 } 8163 8164 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8165 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8166 } 8167 8168 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8169 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8170 } 8171 8172 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8173 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8174 } 8175 8176 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8177 uint64_t BasicInstType, 8178 bool SkipDstVcc, 8179 bool SkipSrcVcc) { 8180 using namespace llvm::AMDGPU::SDWA; 8181 8182 OptionalImmIndexMap OptionalIdx; 8183 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8184 bool SkippedVcc = false; 8185 8186 unsigned I = 1; 8187 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8188 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8189 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8190 } 8191 8192 for (unsigned E = Operands.size(); I != E; ++I) { 8193 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8194 if (SkipVcc && !SkippedVcc && Op.isReg() && 8195 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8196 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8197 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8198 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8199 // Skip VCC only if we didn't skip it on previous iteration. 8200 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8201 if (BasicInstType == SIInstrFlags::VOP2 && 8202 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8203 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8204 SkippedVcc = true; 8205 continue; 8206 } else if (BasicInstType == SIInstrFlags::VOPC && 8207 Inst.getNumOperands() == 0) { 8208 SkippedVcc = true; 8209 continue; 8210 } 8211 } 8212 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8213 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8214 } else if (Op.isImm()) { 8215 // Handle optional arguments 8216 OptionalIdx[Op.getImmTy()] = I; 8217 } else { 8218 llvm_unreachable("Invalid operand type"); 8219 } 8220 SkippedVcc = false; 8221 } 8222 8223 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8224 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8225 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8226 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8227 switch (BasicInstType) { 8228 case SIInstrFlags::VOP1: 8229 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8230 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8231 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8232 } 8233 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8234 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8235 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8236 break; 8237 8238 case SIInstrFlags::VOP2: 8239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8240 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8242 } 8243 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8245 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8246 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8247 break; 8248 8249 case SIInstrFlags::VOPC: 8250 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8251 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8252 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8253 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8254 break; 8255 8256 default: 8257 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8258 } 8259 } 8260 8261 // special case v_mac_{f16, f32}: 8262 // it has src2 register operand that is tied to dst operand 8263 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8264 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8265 auto it = Inst.begin(); 8266 std::advance( 8267 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8268 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8269 } 8270 } 8271 8272 //===----------------------------------------------------------------------===// 8273 // mAI 8274 //===----------------------------------------------------------------------===// 8275 8276 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8277 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8278 } 8279 8280 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8281 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8282 } 8283 8284 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8285 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8286 } 8287 8288 /// Force static initialization. 8289 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8290 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8291 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8292 } 8293 8294 #define GET_REGISTER_MATCHER 8295 #define GET_MATCHER_IMPLEMENTATION 8296 #define GET_MNEMONIC_SPELL_CHECKER 8297 #define GET_MNEMONIC_CHECKER 8298 #include "AMDGPUGenAsmMatcher.inc" 8299 8300 // This fuction should be defined after auto-generated include so that we have 8301 // MatchClassKind enum defined 8302 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8303 unsigned Kind) { 8304 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8305 // But MatchInstructionImpl() expects to meet token and fails to validate 8306 // operand. This method checks if we are given immediate operand but expect to 8307 // get corresponding token. 8308 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8309 switch (Kind) { 8310 case MCK_addr64: 8311 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8312 case MCK_gds: 8313 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8314 case MCK_lds: 8315 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8316 case MCK_idxen: 8317 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8318 case MCK_offen: 8319 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8320 case MCK_SSrcB32: 8321 // When operands have expression values, they will return true for isToken, 8322 // because it is not possible to distinguish between a token and an 8323 // expression at parse time. MatchInstructionImpl() will always try to 8324 // match an operand as a token, when isToken returns true, and when the 8325 // name of the expression is not a valid token, the match will fail, 8326 // so we need to handle it here. 8327 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8328 case MCK_SSrcF32: 8329 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8330 case MCK_SoppBrTarget: 8331 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8332 case MCK_VReg32OrOff: 8333 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8334 case MCK_InterpSlot: 8335 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8336 case MCK_Attr: 8337 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8338 case MCK_AttrChan: 8339 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8340 case MCK_ImmSMEMOffset: 8341 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8342 case MCK_SReg_64: 8343 case MCK_SReg_64_XEXEC: 8344 // Null is defined as a 32-bit register but 8345 // it should also be enabled with 64-bit operands. 8346 // The following code enables it for SReg_64 operands 8347 // used as source and destination. Remaining source 8348 // operands are handled in isInlinableImm. 8349 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8350 default: 8351 return Match_InvalidOperand; 8352 } 8353 } 8354 8355 //===----------------------------------------------------------------------===// 8356 // endpgm 8357 //===----------------------------------------------------------------------===// 8358 8359 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8360 SMLoc S = getLoc(); 8361 int64_t Imm = 0; 8362 8363 if (!parseExpr(Imm)) { 8364 // The operand is optional, if not present default to 0 8365 Imm = 0; 8366 } 8367 8368 if (!isUInt<16>(Imm)) { 8369 Error(S, "expected a 16-bit value"); 8370 return MatchOperand_ParseFail; 8371 } 8372 8373 Operands.push_back( 8374 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8375 return MatchOperand_Success; 8376 } 8377 8378 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8379