1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/MC/TargetRegistry.h" 32 #include "llvm/Support/AMDGPUMetadata.h" 33 #include "llvm/Support/AMDHSAKernelDescriptor.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/MachineValueType.h" 36 #include "llvm/Support/TargetParser.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1333 1334 bool isSI() const { 1335 return AMDGPU::isSI(getSTI()); 1336 } 1337 1338 bool isCI() const { 1339 return AMDGPU::isCI(getSTI()); 1340 } 1341 1342 bool isVI() const { 1343 return AMDGPU::isVI(getSTI()); 1344 } 1345 1346 bool isGFX9() const { 1347 return AMDGPU::isGFX9(getSTI()); 1348 } 1349 1350 bool isGFX90A() const { 1351 return AMDGPU::isGFX90A(getSTI()); 1352 } 1353 1354 bool isGFX9Plus() const { 1355 return AMDGPU::isGFX9Plus(getSTI()); 1356 } 1357 1358 bool isGFX10() const { 1359 return AMDGPU::isGFX10(getSTI()); 1360 } 1361 1362 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1363 1364 bool isGFX10_BEncoding() const { 1365 return AMDGPU::isGFX10_BEncoding(getSTI()); 1366 } 1367 1368 bool hasInv2PiInlineImm() const { 1369 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1370 } 1371 1372 bool hasFlatOffsets() const { 1373 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1374 } 1375 1376 bool hasArchitectedFlatScratch() const { 1377 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1378 } 1379 1380 bool hasSGPR102_SGPR103() const { 1381 return !isVI() && !isGFX9(); 1382 } 1383 1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1385 1386 bool hasIntClamp() const { 1387 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1388 } 1389 1390 AMDGPUTargetStreamer &getTargetStreamer() { 1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1392 return static_cast<AMDGPUTargetStreamer &>(TS); 1393 } 1394 1395 const MCRegisterInfo *getMRI() const { 1396 // We need this const_cast because for some reason getContext() is not const 1397 // in MCAsmParser. 1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1399 } 1400 1401 const MCInstrInfo *getMII() const { 1402 return &MII; 1403 } 1404 1405 const FeatureBitset &getFeatureBits() const { 1406 return getSTI().getFeatureBits(); 1407 } 1408 1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1412 1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1415 bool isForcedDPP() const { return ForcedDPP; } 1416 bool isForcedSDWA() const { return ForcedSDWA; } 1417 ArrayRef<unsigned> getMatchedVariants() const; 1418 StringRef getMatchedVariantName() const; 1419 1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1422 bool RestoreOnFailure); 1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1425 SMLoc &EndLoc) override; 1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1428 unsigned Kind) override; 1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1430 OperandVector &Operands, MCStreamer &Out, 1431 uint64_t &ErrorInfo, 1432 bool MatchingInlineAsm) override; 1433 bool ParseDirective(AsmToken DirectiveID) override; 1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1435 OperandMode Mode = OperandMode_Default); 1436 StringRef parseMnemonicSuffix(StringRef Name); 1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1438 SMLoc NameLoc, OperandVector &Operands) override; 1439 //bool ProcessInstruction(MCInst &Inst); 1440 1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1442 1443 OperandMatchResultTy 1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t &) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseOperandArrayWithPrefix(const char *Prefix, 1450 OperandVector &Operands, 1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1452 bool (*ConvertResult)(int64_t&) = nullptr); 1453 1454 OperandMatchResultTy 1455 parseNamedBit(StringRef Name, OperandVector &Operands, 1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1457 OperandMatchResultTy parseCPol(OperandVector &Operands); 1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1459 StringRef &Value, 1460 SMLoc &StringLoc); 1461 1462 bool isModifier(); 1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1467 bool parseSP3NegModifier(); 1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1469 OperandMatchResultTy parseReg(OperandVector &Operands); 1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1477 OperandMatchResultTy parseUfmt(int64_t &Format); 1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1482 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1485 1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1489 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1490 1491 bool parseCnt(int64_t &IntVal); 1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1493 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1494 1495 private: 1496 struct OperandInfoTy { 1497 SMLoc Loc; 1498 int64_t Id; 1499 bool IsSymbolic = false; 1500 bool IsDefined = false; 1501 1502 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1503 }; 1504 1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1506 bool validateSendMsg(const OperandInfoTy &Msg, 1507 const OperandInfoTy &Op, 1508 const OperandInfoTy &Stream); 1509 1510 bool parseHwregBody(OperandInfoTy &HwReg, 1511 OperandInfoTy &Offset, 1512 OperandInfoTy &Width); 1513 bool validateHwreg(const OperandInfoTy &HwReg, 1514 const OperandInfoTy &Offset, 1515 const OperandInfoTy &Width); 1516 1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1519 1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1521 const OperandVector &Operands) const; 1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1524 SMLoc getLitLoc(const OperandVector &Operands) const; 1525 SMLoc getConstLoc(const OperandVector &Operands) const; 1526 1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1530 bool validateSOPLiteral(const MCInst &Inst) const; 1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateIntClampSupported(const MCInst &Inst); 1534 bool validateMIMGAtomicDMask(const MCInst &Inst); 1535 bool validateMIMGGatherDMask(const MCInst &Inst); 1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateMIMGDataSize(const MCInst &Inst); 1538 bool validateMIMGAddrSize(const MCInst &Inst); 1539 bool validateMIMGD16(const MCInst &Inst); 1540 bool validateMIMGDim(const MCInst &Inst); 1541 bool validateMIMGMSAA(const MCInst &Inst); 1542 bool validateOpSel(const MCInst &Inst); 1543 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1544 bool validateVccOperand(unsigned Reg) const; 1545 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1547 bool validateAGPRLdSt(const MCInst &Inst) const; 1548 bool validateVGPRAlign(const MCInst &Inst) const; 1549 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1550 bool validateDivScale(const MCInst &Inst); 1551 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1552 const SMLoc &IDLoc); 1553 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1554 unsigned getConstantBusLimit(unsigned Opcode) const; 1555 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1556 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1557 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1558 1559 bool isSupportedMnemo(StringRef Mnemo, 1560 const FeatureBitset &FBS); 1561 bool isSupportedMnemo(StringRef Mnemo, 1562 const FeatureBitset &FBS, 1563 ArrayRef<unsigned> Variants); 1564 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1565 1566 bool isId(const StringRef Id) const; 1567 bool isId(const AsmToken &Token, const StringRef Id) const; 1568 bool isToken(const AsmToken::TokenKind Kind) const; 1569 bool trySkipId(const StringRef Id); 1570 bool trySkipId(const StringRef Pref, const StringRef Id); 1571 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1572 bool trySkipToken(const AsmToken::TokenKind Kind); 1573 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1574 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1575 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1576 1577 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1578 AsmToken::TokenKind getTokenKind() const; 1579 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1580 bool parseExpr(OperandVector &Operands); 1581 StringRef getTokenStr() const; 1582 AsmToken peekToken(); 1583 AsmToken getToken() const; 1584 SMLoc getLoc() const; 1585 void lex(); 1586 1587 public: 1588 void onBeginOfFile() override; 1589 1590 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1591 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1592 1593 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1594 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1595 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1596 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1597 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1598 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1599 1600 bool parseSwizzleOperand(int64_t &Op, 1601 const unsigned MinVal, 1602 const unsigned MaxVal, 1603 const StringRef ErrMsg, 1604 SMLoc &Loc); 1605 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1606 const unsigned MinVal, 1607 const unsigned MaxVal, 1608 const StringRef ErrMsg); 1609 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1610 bool parseSwizzleOffset(int64_t &Imm); 1611 bool parseSwizzleMacro(int64_t &Imm); 1612 bool parseSwizzleQuadPerm(int64_t &Imm); 1613 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1614 bool parseSwizzleBroadcast(int64_t &Imm); 1615 bool parseSwizzleSwap(int64_t &Imm); 1616 bool parseSwizzleReverse(int64_t &Imm); 1617 1618 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1619 int64_t parseGPRIdxMacro(); 1620 1621 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1622 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1623 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1624 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1625 1626 AMDGPUOperand::Ptr defaultCPol() const; 1627 1628 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1629 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1630 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1631 AMDGPUOperand::Ptr defaultFlatOffset() const; 1632 1633 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1634 1635 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1636 OptionalImmIndexMap &OptionalIdx); 1637 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1638 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1639 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1640 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1641 OptionalImmIndexMap &OptionalIdx); 1642 1643 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1644 1645 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1646 bool IsAtomic = false); 1647 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1648 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1649 1650 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1651 1652 bool parseDimId(unsigned &Encoding); 1653 OperandMatchResultTy parseDim(OperandVector &Operands); 1654 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1655 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1656 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1657 int64_t parseDPPCtrlSel(StringRef Ctrl); 1658 int64_t parseDPPCtrlPerm(); 1659 AMDGPUOperand::Ptr defaultRowMask() const; 1660 AMDGPUOperand::Ptr defaultBankMask() const; 1661 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1662 AMDGPUOperand::Ptr defaultFI() const; 1663 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1664 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1665 1666 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1667 AMDGPUOperand::ImmTy Type); 1668 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1669 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1670 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1671 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1672 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1673 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1674 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1675 uint64_t BasicInstType, 1676 bool SkipDstVcc = false, 1677 bool SkipSrcVcc = false); 1678 1679 AMDGPUOperand::Ptr defaultBLGP() const; 1680 AMDGPUOperand::Ptr defaultCBSZ() const; 1681 AMDGPUOperand::Ptr defaultABID() const; 1682 1683 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1684 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1685 }; 1686 1687 struct OptionalOperand { 1688 const char *Name; 1689 AMDGPUOperand::ImmTy Type; 1690 bool IsBit; 1691 bool (*ConvertResult)(int64_t&); 1692 }; 1693 1694 } // end anonymous namespace 1695 1696 // May be called with integer type with equivalent bitwidth. 1697 static const fltSemantics *getFltSemantics(unsigned Size) { 1698 switch (Size) { 1699 case 4: 1700 return &APFloat::IEEEsingle(); 1701 case 8: 1702 return &APFloat::IEEEdouble(); 1703 case 2: 1704 return &APFloat::IEEEhalf(); 1705 default: 1706 llvm_unreachable("unsupported fp type"); 1707 } 1708 } 1709 1710 static const fltSemantics *getFltSemantics(MVT VT) { 1711 return getFltSemantics(VT.getSizeInBits() / 8); 1712 } 1713 1714 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1715 switch (OperandType) { 1716 case AMDGPU::OPERAND_REG_IMM_INT32: 1717 case AMDGPU::OPERAND_REG_IMM_FP32: 1718 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1719 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1720 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1721 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1722 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1723 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1724 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1725 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1726 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1727 case AMDGPU::OPERAND_KIMM32: 1728 return &APFloat::IEEEsingle(); 1729 case AMDGPU::OPERAND_REG_IMM_INT64: 1730 case AMDGPU::OPERAND_REG_IMM_FP64: 1731 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1732 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1733 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1734 return &APFloat::IEEEdouble(); 1735 case AMDGPU::OPERAND_REG_IMM_INT16: 1736 case AMDGPU::OPERAND_REG_IMM_FP16: 1737 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1738 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1739 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1740 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1741 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1742 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1743 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1744 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1745 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1746 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1747 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1748 case AMDGPU::OPERAND_KIMM16: 1749 return &APFloat::IEEEhalf(); 1750 default: 1751 llvm_unreachable("unsupported fp type"); 1752 } 1753 } 1754 1755 //===----------------------------------------------------------------------===// 1756 // Operand 1757 //===----------------------------------------------------------------------===// 1758 1759 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1760 bool Lost; 1761 1762 // Convert literal to single precision 1763 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1764 APFloat::rmNearestTiesToEven, 1765 &Lost); 1766 // We allow precision lost but not overflow or underflow 1767 if (Status != APFloat::opOK && 1768 Lost && 1769 ((Status & APFloat::opOverflow) != 0 || 1770 (Status & APFloat::opUnderflow) != 0)) { 1771 return false; 1772 } 1773 1774 return true; 1775 } 1776 1777 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1778 return isUIntN(Size, Val) || isIntN(Size, Val); 1779 } 1780 1781 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1782 if (VT.getScalarType() == MVT::i16) { 1783 // FP immediate values are broken. 1784 return isInlinableIntLiteral(Val); 1785 } 1786 1787 // f16/v2f16 operands work correctly for all values. 1788 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1789 } 1790 1791 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1792 1793 // This is a hack to enable named inline values like 1794 // shared_base with both 32-bit and 64-bit operands. 1795 // Note that these values are defined as 1796 // 32-bit operands only. 1797 if (isInlineValue()) { 1798 return true; 1799 } 1800 1801 if (!isImmTy(ImmTyNone)) { 1802 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1803 return false; 1804 } 1805 // TODO: We should avoid using host float here. It would be better to 1806 // check the float bit values which is what a few other places do. 1807 // We've had bot failures before due to weird NaN support on mips hosts. 1808 1809 APInt Literal(64, Imm.Val); 1810 1811 if (Imm.IsFPImm) { // We got fp literal token 1812 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1813 return AMDGPU::isInlinableLiteral64(Imm.Val, 1814 AsmParser->hasInv2PiInlineImm()); 1815 } 1816 1817 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1818 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1819 return false; 1820 1821 if (type.getScalarSizeInBits() == 16) { 1822 return isInlineableLiteralOp16( 1823 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1824 type, AsmParser->hasInv2PiInlineImm()); 1825 } 1826 1827 // Check if single precision literal is inlinable 1828 return AMDGPU::isInlinableLiteral32( 1829 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1830 AsmParser->hasInv2PiInlineImm()); 1831 } 1832 1833 // We got int literal token. 1834 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1835 return AMDGPU::isInlinableLiteral64(Imm.Val, 1836 AsmParser->hasInv2PiInlineImm()); 1837 } 1838 1839 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1840 return false; 1841 } 1842 1843 if (type.getScalarSizeInBits() == 16) { 1844 return isInlineableLiteralOp16( 1845 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1846 type, AsmParser->hasInv2PiInlineImm()); 1847 } 1848 1849 return AMDGPU::isInlinableLiteral32( 1850 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1851 AsmParser->hasInv2PiInlineImm()); 1852 } 1853 1854 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1855 // Check that this immediate can be added as literal 1856 if (!isImmTy(ImmTyNone)) { 1857 return false; 1858 } 1859 1860 if (!Imm.IsFPImm) { 1861 // We got int literal token. 1862 1863 if (type == MVT::f64 && hasFPModifiers()) { 1864 // Cannot apply fp modifiers to int literals preserving the same semantics 1865 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1866 // disable these cases. 1867 return false; 1868 } 1869 1870 unsigned Size = type.getSizeInBits(); 1871 if (Size == 64) 1872 Size = 32; 1873 1874 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1875 // types. 1876 return isSafeTruncation(Imm.Val, Size); 1877 } 1878 1879 // We got fp literal token 1880 if (type == MVT::f64) { // Expected 64-bit fp operand 1881 // We would set low 64-bits of literal to zeroes but we accept this literals 1882 return true; 1883 } 1884 1885 if (type == MVT::i64) { // Expected 64-bit int operand 1886 // We don't allow fp literals in 64-bit integer instructions. It is 1887 // unclear how we should encode them. 1888 return false; 1889 } 1890 1891 // We allow fp literals with f16x2 operands assuming that the specified 1892 // literal goes into the lower half and the upper half is zero. We also 1893 // require that the literal may be losslesly converted to f16. 1894 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1895 (type == MVT::v2i16)? MVT::i16 : 1896 (type == MVT::v2f32)? MVT::f32 : type; 1897 1898 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1899 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1900 } 1901 1902 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1903 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1904 } 1905 1906 bool AMDGPUOperand::isVRegWithInputMods() const { 1907 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1908 // GFX90A allows DPP on 64-bit operands. 1909 (isRegClass(AMDGPU::VReg_64RegClassID) && 1910 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1911 } 1912 1913 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1914 if (AsmParser->isVI()) 1915 return isVReg32(); 1916 else if (AsmParser->isGFX9Plus()) 1917 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1918 else 1919 return false; 1920 } 1921 1922 bool AMDGPUOperand::isSDWAFP16Operand() const { 1923 return isSDWAOperand(MVT::f16); 1924 } 1925 1926 bool AMDGPUOperand::isSDWAFP32Operand() const { 1927 return isSDWAOperand(MVT::f32); 1928 } 1929 1930 bool AMDGPUOperand::isSDWAInt16Operand() const { 1931 return isSDWAOperand(MVT::i16); 1932 } 1933 1934 bool AMDGPUOperand::isSDWAInt32Operand() const { 1935 return isSDWAOperand(MVT::i32); 1936 } 1937 1938 bool AMDGPUOperand::isBoolReg() const { 1939 auto FB = AsmParser->getFeatureBits(); 1940 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1941 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1942 } 1943 1944 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1945 { 1946 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1947 assert(Size == 2 || Size == 4 || Size == 8); 1948 1949 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1950 1951 if (Imm.Mods.Abs) { 1952 Val &= ~FpSignMask; 1953 } 1954 if (Imm.Mods.Neg) { 1955 Val ^= FpSignMask; 1956 } 1957 1958 return Val; 1959 } 1960 1961 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1962 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1963 Inst.getNumOperands())) { 1964 addLiteralImmOperand(Inst, Imm.Val, 1965 ApplyModifiers & 1966 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1967 } else { 1968 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1969 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1970 setImmKindNone(); 1971 } 1972 } 1973 1974 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1975 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1976 auto OpNum = Inst.getNumOperands(); 1977 // Check that this operand accepts literals 1978 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1979 1980 if (ApplyModifiers) { 1981 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1982 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1983 Val = applyInputFPModifiers(Val, Size); 1984 } 1985 1986 APInt Literal(64, Val); 1987 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1988 1989 if (Imm.IsFPImm) { // We got fp literal token 1990 switch (OpTy) { 1991 case AMDGPU::OPERAND_REG_IMM_INT64: 1992 case AMDGPU::OPERAND_REG_IMM_FP64: 1993 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1994 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1995 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1996 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1997 AsmParser->hasInv2PiInlineImm())) { 1998 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1999 setImmKindConst(); 2000 return; 2001 } 2002 2003 // Non-inlineable 2004 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2005 // For fp operands we check if low 32 bits are zeros 2006 if (Literal.getLoBits(32) != 0) { 2007 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2008 "Can't encode literal as exact 64-bit floating-point operand. " 2009 "Low 32-bits will be set to zero"); 2010 } 2011 2012 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2013 setImmKindLiteral(); 2014 return; 2015 } 2016 2017 // We don't allow fp literals in 64-bit integer instructions. It is 2018 // unclear how we should encode them. This case should be checked earlier 2019 // in predicate methods (isLiteralImm()) 2020 llvm_unreachable("fp literal in 64-bit integer instruction."); 2021 2022 case AMDGPU::OPERAND_REG_IMM_INT32: 2023 case AMDGPU::OPERAND_REG_IMM_FP32: 2024 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2025 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2026 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2027 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2028 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2029 case AMDGPU::OPERAND_REG_IMM_INT16: 2030 case AMDGPU::OPERAND_REG_IMM_FP16: 2031 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2032 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2033 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2034 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2035 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2036 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2037 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2038 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2039 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2040 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2041 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2042 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2043 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2044 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2045 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2046 case AMDGPU::OPERAND_KIMM32: 2047 case AMDGPU::OPERAND_KIMM16: { 2048 bool lost; 2049 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2050 // Convert literal to single precision 2051 FPLiteral.convert(*getOpFltSemantics(OpTy), 2052 APFloat::rmNearestTiesToEven, &lost); 2053 // We allow precision lost but not overflow or underflow. This should be 2054 // checked earlier in isLiteralImm() 2055 2056 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2057 Inst.addOperand(MCOperand::createImm(ImmVal)); 2058 setImmKindLiteral(); 2059 return; 2060 } 2061 default: 2062 llvm_unreachable("invalid operand size"); 2063 } 2064 2065 return; 2066 } 2067 2068 // We got int literal token. 2069 // Only sign extend inline immediates. 2070 switch (OpTy) { 2071 case AMDGPU::OPERAND_REG_IMM_INT32: 2072 case AMDGPU::OPERAND_REG_IMM_FP32: 2073 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2074 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2075 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2076 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2077 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2078 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2079 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2080 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2081 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2082 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2083 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2084 if (isSafeTruncation(Val, 32) && 2085 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2086 AsmParser->hasInv2PiInlineImm())) { 2087 Inst.addOperand(MCOperand::createImm(Val)); 2088 setImmKindConst(); 2089 return; 2090 } 2091 2092 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2093 setImmKindLiteral(); 2094 return; 2095 2096 case AMDGPU::OPERAND_REG_IMM_INT64: 2097 case AMDGPU::OPERAND_REG_IMM_FP64: 2098 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2099 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2100 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2101 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2102 Inst.addOperand(MCOperand::createImm(Val)); 2103 setImmKindConst(); 2104 return; 2105 } 2106 2107 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2108 setImmKindLiteral(); 2109 return; 2110 2111 case AMDGPU::OPERAND_REG_IMM_INT16: 2112 case AMDGPU::OPERAND_REG_IMM_FP16: 2113 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2114 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2115 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2116 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2118 if (isSafeTruncation(Val, 16) && 2119 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2120 AsmParser->hasInv2PiInlineImm())) { 2121 Inst.addOperand(MCOperand::createImm(Val)); 2122 setImmKindConst(); 2123 return; 2124 } 2125 2126 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2127 setImmKindLiteral(); 2128 return; 2129 2130 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2131 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2132 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2133 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2134 assert(isSafeTruncation(Val, 16)); 2135 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2136 AsmParser->hasInv2PiInlineImm())); 2137 2138 Inst.addOperand(MCOperand::createImm(Val)); 2139 return; 2140 } 2141 case AMDGPU::OPERAND_KIMM32: 2142 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2143 setImmKindNone(); 2144 return; 2145 case AMDGPU::OPERAND_KIMM16: 2146 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2147 setImmKindNone(); 2148 return; 2149 default: 2150 llvm_unreachable("invalid operand size"); 2151 } 2152 } 2153 2154 template <unsigned Bitwidth> 2155 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2156 APInt Literal(64, Imm.Val); 2157 setImmKindNone(); 2158 2159 if (!Imm.IsFPImm) { 2160 // We got int literal token. 2161 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2162 return; 2163 } 2164 2165 bool Lost; 2166 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2167 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2168 APFloat::rmNearestTiesToEven, &Lost); 2169 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2170 } 2171 2172 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2173 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2174 } 2175 2176 static bool isInlineValue(unsigned Reg) { 2177 switch (Reg) { 2178 case AMDGPU::SRC_SHARED_BASE: 2179 case AMDGPU::SRC_SHARED_LIMIT: 2180 case AMDGPU::SRC_PRIVATE_BASE: 2181 case AMDGPU::SRC_PRIVATE_LIMIT: 2182 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2183 return true; 2184 case AMDGPU::SRC_VCCZ: 2185 case AMDGPU::SRC_EXECZ: 2186 case AMDGPU::SRC_SCC: 2187 return true; 2188 case AMDGPU::SGPR_NULL: 2189 return true; 2190 default: 2191 return false; 2192 } 2193 } 2194 2195 bool AMDGPUOperand::isInlineValue() const { 2196 return isRegKind() && ::isInlineValue(getReg()); 2197 } 2198 2199 //===----------------------------------------------------------------------===// 2200 // AsmParser 2201 //===----------------------------------------------------------------------===// 2202 2203 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2204 if (Is == IS_VGPR) { 2205 switch (RegWidth) { 2206 default: return -1; 2207 case 1: return AMDGPU::VGPR_32RegClassID; 2208 case 2: return AMDGPU::VReg_64RegClassID; 2209 case 3: return AMDGPU::VReg_96RegClassID; 2210 case 4: return AMDGPU::VReg_128RegClassID; 2211 case 5: return AMDGPU::VReg_160RegClassID; 2212 case 6: return AMDGPU::VReg_192RegClassID; 2213 case 7: return AMDGPU::VReg_224RegClassID; 2214 case 8: return AMDGPU::VReg_256RegClassID; 2215 case 16: return AMDGPU::VReg_512RegClassID; 2216 case 32: return AMDGPU::VReg_1024RegClassID; 2217 } 2218 } else if (Is == IS_TTMP) { 2219 switch (RegWidth) { 2220 default: return -1; 2221 case 1: return AMDGPU::TTMP_32RegClassID; 2222 case 2: return AMDGPU::TTMP_64RegClassID; 2223 case 4: return AMDGPU::TTMP_128RegClassID; 2224 case 8: return AMDGPU::TTMP_256RegClassID; 2225 case 16: return AMDGPU::TTMP_512RegClassID; 2226 } 2227 } else if (Is == IS_SGPR) { 2228 switch (RegWidth) { 2229 default: return -1; 2230 case 1: return AMDGPU::SGPR_32RegClassID; 2231 case 2: return AMDGPU::SGPR_64RegClassID; 2232 case 3: return AMDGPU::SGPR_96RegClassID; 2233 case 4: return AMDGPU::SGPR_128RegClassID; 2234 case 5: return AMDGPU::SGPR_160RegClassID; 2235 case 6: return AMDGPU::SGPR_192RegClassID; 2236 case 7: return AMDGPU::SGPR_224RegClassID; 2237 case 8: return AMDGPU::SGPR_256RegClassID; 2238 case 16: return AMDGPU::SGPR_512RegClassID; 2239 } 2240 } else if (Is == IS_AGPR) { 2241 switch (RegWidth) { 2242 default: return -1; 2243 case 1: return AMDGPU::AGPR_32RegClassID; 2244 case 2: return AMDGPU::AReg_64RegClassID; 2245 case 3: return AMDGPU::AReg_96RegClassID; 2246 case 4: return AMDGPU::AReg_128RegClassID; 2247 case 5: return AMDGPU::AReg_160RegClassID; 2248 case 6: return AMDGPU::AReg_192RegClassID; 2249 case 7: return AMDGPU::AReg_224RegClassID; 2250 case 8: return AMDGPU::AReg_256RegClassID; 2251 case 16: return AMDGPU::AReg_512RegClassID; 2252 case 32: return AMDGPU::AReg_1024RegClassID; 2253 } 2254 } 2255 return -1; 2256 } 2257 2258 static unsigned getSpecialRegForName(StringRef RegName) { 2259 return StringSwitch<unsigned>(RegName) 2260 .Case("exec", AMDGPU::EXEC) 2261 .Case("vcc", AMDGPU::VCC) 2262 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2263 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2264 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2265 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2266 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2267 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2268 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2269 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2270 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2271 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2272 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2273 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2274 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2275 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2276 .Case("m0", AMDGPU::M0) 2277 .Case("vccz", AMDGPU::SRC_VCCZ) 2278 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2279 .Case("execz", AMDGPU::SRC_EXECZ) 2280 .Case("src_execz", AMDGPU::SRC_EXECZ) 2281 .Case("scc", AMDGPU::SRC_SCC) 2282 .Case("src_scc", AMDGPU::SRC_SCC) 2283 .Case("tba", AMDGPU::TBA) 2284 .Case("tma", AMDGPU::TMA) 2285 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2286 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2287 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2288 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2289 .Case("vcc_lo", AMDGPU::VCC_LO) 2290 .Case("vcc_hi", AMDGPU::VCC_HI) 2291 .Case("exec_lo", AMDGPU::EXEC_LO) 2292 .Case("exec_hi", AMDGPU::EXEC_HI) 2293 .Case("tma_lo", AMDGPU::TMA_LO) 2294 .Case("tma_hi", AMDGPU::TMA_HI) 2295 .Case("tba_lo", AMDGPU::TBA_LO) 2296 .Case("tba_hi", AMDGPU::TBA_HI) 2297 .Case("pc", AMDGPU::PC_REG) 2298 .Case("null", AMDGPU::SGPR_NULL) 2299 .Default(AMDGPU::NoRegister); 2300 } 2301 2302 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2303 SMLoc &EndLoc, bool RestoreOnFailure) { 2304 auto R = parseRegister(); 2305 if (!R) return true; 2306 assert(R->isReg()); 2307 RegNo = R->getReg(); 2308 StartLoc = R->getStartLoc(); 2309 EndLoc = R->getEndLoc(); 2310 return false; 2311 } 2312 2313 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2314 SMLoc &EndLoc) { 2315 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2316 } 2317 2318 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2319 SMLoc &StartLoc, 2320 SMLoc &EndLoc) { 2321 bool Result = 2322 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2323 bool PendingErrors = getParser().hasPendingError(); 2324 getParser().clearPendingErrors(); 2325 if (PendingErrors) 2326 return MatchOperand_ParseFail; 2327 if (Result) 2328 return MatchOperand_NoMatch; 2329 return MatchOperand_Success; 2330 } 2331 2332 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2333 RegisterKind RegKind, unsigned Reg1, 2334 SMLoc Loc) { 2335 switch (RegKind) { 2336 case IS_SPECIAL: 2337 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2338 Reg = AMDGPU::EXEC; 2339 RegWidth = 2; 2340 return true; 2341 } 2342 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2343 Reg = AMDGPU::FLAT_SCR; 2344 RegWidth = 2; 2345 return true; 2346 } 2347 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2348 Reg = AMDGPU::XNACK_MASK; 2349 RegWidth = 2; 2350 return true; 2351 } 2352 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2353 Reg = AMDGPU::VCC; 2354 RegWidth = 2; 2355 return true; 2356 } 2357 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2358 Reg = AMDGPU::TBA; 2359 RegWidth = 2; 2360 return true; 2361 } 2362 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2363 Reg = AMDGPU::TMA; 2364 RegWidth = 2; 2365 return true; 2366 } 2367 Error(Loc, "register does not fit in the list"); 2368 return false; 2369 case IS_VGPR: 2370 case IS_SGPR: 2371 case IS_AGPR: 2372 case IS_TTMP: 2373 if (Reg1 != Reg + RegWidth) { 2374 Error(Loc, "registers in a list must have consecutive indices"); 2375 return false; 2376 } 2377 RegWidth++; 2378 return true; 2379 default: 2380 llvm_unreachable("unexpected register kind"); 2381 } 2382 } 2383 2384 struct RegInfo { 2385 StringLiteral Name; 2386 RegisterKind Kind; 2387 }; 2388 2389 static constexpr RegInfo RegularRegisters[] = { 2390 {{"v"}, IS_VGPR}, 2391 {{"s"}, IS_SGPR}, 2392 {{"ttmp"}, IS_TTMP}, 2393 {{"acc"}, IS_AGPR}, 2394 {{"a"}, IS_AGPR}, 2395 }; 2396 2397 static bool isRegularReg(RegisterKind Kind) { 2398 return Kind == IS_VGPR || 2399 Kind == IS_SGPR || 2400 Kind == IS_TTMP || 2401 Kind == IS_AGPR; 2402 } 2403 2404 static const RegInfo* getRegularRegInfo(StringRef Str) { 2405 for (const RegInfo &Reg : RegularRegisters) 2406 if (Str.startswith(Reg.Name)) 2407 return &Reg; 2408 return nullptr; 2409 } 2410 2411 static bool getRegNum(StringRef Str, unsigned& Num) { 2412 return !Str.getAsInteger(10, Num); 2413 } 2414 2415 bool 2416 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2417 const AsmToken &NextToken) const { 2418 2419 // A list of consecutive registers: [s0,s1,s2,s3] 2420 if (Token.is(AsmToken::LBrac)) 2421 return true; 2422 2423 if (!Token.is(AsmToken::Identifier)) 2424 return false; 2425 2426 // A single register like s0 or a range of registers like s[0:1] 2427 2428 StringRef Str = Token.getString(); 2429 const RegInfo *Reg = getRegularRegInfo(Str); 2430 if (Reg) { 2431 StringRef RegName = Reg->Name; 2432 StringRef RegSuffix = Str.substr(RegName.size()); 2433 if (!RegSuffix.empty()) { 2434 unsigned Num; 2435 // A single register with an index: rXX 2436 if (getRegNum(RegSuffix, Num)) 2437 return true; 2438 } else { 2439 // A range of registers: r[XX:YY]. 2440 if (NextToken.is(AsmToken::LBrac)) 2441 return true; 2442 } 2443 } 2444 2445 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2446 } 2447 2448 bool 2449 AMDGPUAsmParser::isRegister() 2450 { 2451 return isRegister(getToken(), peekToken()); 2452 } 2453 2454 unsigned 2455 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2456 unsigned RegNum, 2457 unsigned RegWidth, 2458 SMLoc Loc) { 2459 2460 assert(isRegularReg(RegKind)); 2461 2462 unsigned AlignSize = 1; 2463 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2464 // SGPR and TTMP registers must be aligned. 2465 // Max required alignment is 4 dwords. 2466 AlignSize = std::min(RegWidth, 4u); 2467 } 2468 2469 if (RegNum % AlignSize != 0) { 2470 Error(Loc, "invalid register alignment"); 2471 return AMDGPU::NoRegister; 2472 } 2473 2474 unsigned RegIdx = RegNum / AlignSize; 2475 int RCID = getRegClass(RegKind, RegWidth); 2476 if (RCID == -1) { 2477 Error(Loc, "invalid or unsupported register size"); 2478 return AMDGPU::NoRegister; 2479 } 2480 2481 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2482 const MCRegisterClass RC = TRI->getRegClass(RCID); 2483 if (RegIdx >= RC.getNumRegs()) { 2484 Error(Loc, "register index is out of range"); 2485 return AMDGPU::NoRegister; 2486 } 2487 2488 return RC.getRegister(RegIdx); 2489 } 2490 2491 bool 2492 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2493 int64_t RegLo, RegHi; 2494 if (!skipToken(AsmToken::LBrac, "missing register index")) 2495 return false; 2496 2497 SMLoc FirstIdxLoc = getLoc(); 2498 SMLoc SecondIdxLoc; 2499 2500 if (!parseExpr(RegLo)) 2501 return false; 2502 2503 if (trySkipToken(AsmToken::Colon)) { 2504 SecondIdxLoc = getLoc(); 2505 if (!parseExpr(RegHi)) 2506 return false; 2507 } else { 2508 RegHi = RegLo; 2509 } 2510 2511 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2512 return false; 2513 2514 if (!isUInt<32>(RegLo)) { 2515 Error(FirstIdxLoc, "invalid register index"); 2516 return false; 2517 } 2518 2519 if (!isUInt<32>(RegHi)) { 2520 Error(SecondIdxLoc, "invalid register index"); 2521 return false; 2522 } 2523 2524 if (RegLo > RegHi) { 2525 Error(FirstIdxLoc, "first register index should not exceed second index"); 2526 return false; 2527 } 2528 2529 Num = static_cast<unsigned>(RegLo); 2530 Width = (RegHi - RegLo) + 1; 2531 return true; 2532 } 2533 2534 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2535 unsigned &RegNum, unsigned &RegWidth, 2536 SmallVectorImpl<AsmToken> &Tokens) { 2537 assert(isToken(AsmToken::Identifier)); 2538 unsigned Reg = getSpecialRegForName(getTokenStr()); 2539 if (Reg) { 2540 RegNum = 0; 2541 RegWidth = 1; 2542 RegKind = IS_SPECIAL; 2543 Tokens.push_back(getToken()); 2544 lex(); // skip register name 2545 } 2546 return Reg; 2547 } 2548 2549 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2550 unsigned &RegNum, unsigned &RegWidth, 2551 SmallVectorImpl<AsmToken> &Tokens) { 2552 assert(isToken(AsmToken::Identifier)); 2553 StringRef RegName = getTokenStr(); 2554 auto Loc = getLoc(); 2555 2556 const RegInfo *RI = getRegularRegInfo(RegName); 2557 if (!RI) { 2558 Error(Loc, "invalid register name"); 2559 return AMDGPU::NoRegister; 2560 } 2561 2562 Tokens.push_back(getToken()); 2563 lex(); // skip register name 2564 2565 RegKind = RI->Kind; 2566 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2567 if (!RegSuffix.empty()) { 2568 // Single 32-bit register: vXX. 2569 if (!getRegNum(RegSuffix, RegNum)) { 2570 Error(Loc, "invalid register index"); 2571 return AMDGPU::NoRegister; 2572 } 2573 RegWidth = 1; 2574 } else { 2575 // Range of registers: v[XX:YY]. ":YY" is optional. 2576 if (!ParseRegRange(RegNum, RegWidth)) 2577 return AMDGPU::NoRegister; 2578 } 2579 2580 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2581 } 2582 2583 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2584 unsigned &RegWidth, 2585 SmallVectorImpl<AsmToken> &Tokens) { 2586 unsigned Reg = AMDGPU::NoRegister; 2587 auto ListLoc = getLoc(); 2588 2589 if (!skipToken(AsmToken::LBrac, 2590 "expected a register or a list of registers")) { 2591 return AMDGPU::NoRegister; 2592 } 2593 2594 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2595 2596 auto Loc = getLoc(); 2597 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2598 return AMDGPU::NoRegister; 2599 if (RegWidth != 1) { 2600 Error(Loc, "expected a single 32-bit register"); 2601 return AMDGPU::NoRegister; 2602 } 2603 2604 for (; trySkipToken(AsmToken::Comma); ) { 2605 RegisterKind NextRegKind; 2606 unsigned NextReg, NextRegNum, NextRegWidth; 2607 Loc = getLoc(); 2608 2609 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2610 NextRegNum, NextRegWidth, 2611 Tokens)) { 2612 return AMDGPU::NoRegister; 2613 } 2614 if (NextRegWidth != 1) { 2615 Error(Loc, "expected a single 32-bit register"); 2616 return AMDGPU::NoRegister; 2617 } 2618 if (NextRegKind != RegKind) { 2619 Error(Loc, "registers in a list must be of the same kind"); 2620 return AMDGPU::NoRegister; 2621 } 2622 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2623 return AMDGPU::NoRegister; 2624 } 2625 2626 if (!skipToken(AsmToken::RBrac, 2627 "expected a comma or a closing square bracket")) { 2628 return AMDGPU::NoRegister; 2629 } 2630 2631 if (isRegularReg(RegKind)) 2632 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2633 2634 return Reg; 2635 } 2636 2637 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2638 unsigned &RegNum, unsigned &RegWidth, 2639 SmallVectorImpl<AsmToken> &Tokens) { 2640 auto Loc = getLoc(); 2641 Reg = AMDGPU::NoRegister; 2642 2643 if (isToken(AsmToken::Identifier)) { 2644 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2645 if (Reg == AMDGPU::NoRegister) 2646 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2647 } else { 2648 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2649 } 2650 2651 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2652 if (Reg == AMDGPU::NoRegister) { 2653 assert(Parser.hasPendingError()); 2654 return false; 2655 } 2656 2657 if (!subtargetHasRegister(*TRI, Reg)) { 2658 if (Reg == AMDGPU::SGPR_NULL) { 2659 Error(Loc, "'null' operand is not supported on this GPU"); 2660 } else { 2661 Error(Loc, "register not available on this GPU"); 2662 } 2663 return false; 2664 } 2665 2666 return true; 2667 } 2668 2669 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2670 unsigned &RegNum, unsigned &RegWidth, 2671 bool RestoreOnFailure /*=false*/) { 2672 Reg = AMDGPU::NoRegister; 2673 2674 SmallVector<AsmToken, 1> Tokens; 2675 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2676 if (RestoreOnFailure) { 2677 while (!Tokens.empty()) { 2678 getLexer().UnLex(Tokens.pop_back_val()); 2679 } 2680 } 2681 return true; 2682 } 2683 return false; 2684 } 2685 2686 Optional<StringRef> 2687 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2688 switch (RegKind) { 2689 case IS_VGPR: 2690 return StringRef(".amdgcn.next_free_vgpr"); 2691 case IS_SGPR: 2692 return StringRef(".amdgcn.next_free_sgpr"); 2693 default: 2694 return None; 2695 } 2696 } 2697 2698 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2699 auto SymbolName = getGprCountSymbolName(RegKind); 2700 assert(SymbolName && "initializing invalid register kind"); 2701 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2702 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2703 } 2704 2705 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2706 unsigned DwordRegIndex, 2707 unsigned RegWidth) { 2708 // Symbols are only defined for GCN targets 2709 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2710 return true; 2711 2712 auto SymbolName = getGprCountSymbolName(RegKind); 2713 if (!SymbolName) 2714 return true; 2715 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2716 2717 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2718 int64_t OldCount; 2719 2720 if (!Sym->isVariable()) 2721 return !Error(getLoc(), 2722 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2723 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2724 return !Error( 2725 getLoc(), 2726 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2727 2728 if (OldCount <= NewMax) 2729 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2730 2731 return true; 2732 } 2733 2734 std::unique_ptr<AMDGPUOperand> 2735 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2736 const auto &Tok = getToken(); 2737 SMLoc StartLoc = Tok.getLoc(); 2738 SMLoc EndLoc = Tok.getEndLoc(); 2739 RegisterKind RegKind; 2740 unsigned Reg, RegNum, RegWidth; 2741 2742 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2743 return nullptr; 2744 } 2745 if (isHsaAbiVersion3Or4(&getSTI())) { 2746 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2747 return nullptr; 2748 } else 2749 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2750 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2751 } 2752 2753 OperandMatchResultTy 2754 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2755 // TODO: add syntactic sugar for 1/(2*PI) 2756 2757 assert(!isRegister()); 2758 assert(!isModifier()); 2759 2760 const auto& Tok = getToken(); 2761 const auto& NextTok = peekToken(); 2762 bool IsReal = Tok.is(AsmToken::Real); 2763 SMLoc S = getLoc(); 2764 bool Negate = false; 2765 2766 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2767 lex(); 2768 IsReal = true; 2769 Negate = true; 2770 } 2771 2772 if (IsReal) { 2773 // Floating-point expressions are not supported. 2774 // Can only allow floating-point literals with an 2775 // optional sign. 2776 2777 StringRef Num = getTokenStr(); 2778 lex(); 2779 2780 APFloat RealVal(APFloat::IEEEdouble()); 2781 auto roundMode = APFloat::rmNearestTiesToEven; 2782 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2783 return MatchOperand_ParseFail; 2784 } 2785 if (Negate) 2786 RealVal.changeSign(); 2787 2788 Operands.push_back( 2789 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2790 AMDGPUOperand::ImmTyNone, true)); 2791 2792 return MatchOperand_Success; 2793 2794 } else { 2795 int64_t IntVal; 2796 const MCExpr *Expr; 2797 SMLoc S = getLoc(); 2798 2799 if (HasSP3AbsModifier) { 2800 // This is a workaround for handling expressions 2801 // as arguments of SP3 'abs' modifier, for example: 2802 // |1.0| 2803 // |-1| 2804 // |1+x| 2805 // This syntax is not compatible with syntax of standard 2806 // MC expressions (due to the trailing '|'). 2807 SMLoc EndLoc; 2808 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2809 return MatchOperand_ParseFail; 2810 } else { 2811 if (Parser.parseExpression(Expr)) 2812 return MatchOperand_ParseFail; 2813 } 2814 2815 if (Expr->evaluateAsAbsolute(IntVal)) { 2816 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2817 } else { 2818 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2819 } 2820 2821 return MatchOperand_Success; 2822 } 2823 2824 return MatchOperand_NoMatch; 2825 } 2826 2827 OperandMatchResultTy 2828 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2829 if (!isRegister()) 2830 return MatchOperand_NoMatch; 2831 2832 if (auto R = parseRegister()) { 2833 assert(R->isReg()); 2834 Operands.push_back(std::move(R)); 2835 return MatchOperand_Success; 2836 } 2837 return MatchOperand_ParseFail; 2838 } 2839 2840 OperandMatchResultTy 2841 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2842 auto res = parseReg(Operands); 2843 if (res != MatchOperand_NoMatch) { 2844 return res; 2845 } else if (isModifier()) { 2846 return MatchOperand_NoMatch; 2847 } else { 2848 return parseImm(Operands, HasSP3AbsMod); 2849 } 2850 } 2851 2852 bool 2853 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2854 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2855 const auto &str = Token.getString(); 2856 return str == "abs" || str == "neg" || str == "sext"; 2857 } 2858 return false; 2859 } 2860 2861 bool 2862 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2863 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2864 } 2865 2866 bool 2867 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2868 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2869 } 2870 2871 bool 2872 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2873 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2874 } 2875 2876 // Check if this is an operand modifier or an opcode modifier 2877 // which may look like an expression but it is not. We should 2878 // avoid parsing these modifiers as expressions. Currently 2879 // recognized sequences are: 2880 // |...| 2881 // abs(...) 2882 // neg(...) 2883 // sext(...) 2884 // -reg 2885 // -|...| 2886 // -abs(...) 2887 // name:... 2888 // Note that simple opcode modifiers like 'gds' may be parsed as 2889 // expressions; this is a special case. See getExpressionAsToken. 2890 // 2891 bool 2892 AMDGPUAsmParser::isModifier() { 2893 2894 AsmToken Tok = getToken(); 2895 AsmToken NextToken[2]; 2896 peekTokens(NextToken); 2897 2898 return isOperandModifier(Tok, NextToken[0]) || 2899 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2900 isOpcodeModifierWithVal(Tok, NextToken[0]); 2901 } 2902 2903 // Check if the current token is an SP3 'neg' modifier. 2904 // Currently this modifier is allowed in the following context: 2905 // 2906 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2907 // 2. Before an 'abs' modifier: -abs(...) 2908 // 3. Before an SP3 'abs' modifier: -|...| 2909 // 2910 // In all other cases "-" is handled as a part 2911 // of an expression that follows the sign. 2912 // 2913 // Note: When "-" is followed by an integer literal, 2914 // this is interpreted as integer negation rather 2915 // than a floating-point NEG modifier applied to N. 2916 // Beside being contr-intuitive, such use of floating-point 2917 // NEG modifier would have resulted in different meaning 2918 // of integer literals used with VOP1/2/C and VOP3, 2919 // for example: 2920 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2921 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2922 // Negative fp literals with preceding "-" are 2923 // handled likewise for unifomtity 2924 // 2925 bool 2926 AMDGPUAsmParser::parseSP3NegModifier() { 2927 2928 AsmToken NextToken[2]; 2929 peekTokens(NextToken); 2930 2931 if (isToken(AsmToken::Minus) && 2932 (isRegister(NextToken[0], NextToken[1]) || 2933 NextToken[0].is(AsmToken::Pipe) || 2934 isId(NextToken[0], "abs"))) { 2935 lex(); 2936 return true; 2937 } 2938 2939 return false; 2940 } 2941 2942 OperandMatchResultTy 2943 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2944 bool AllowImm) { 2945 bool Neg, SP3Neg; 2946 bool Abs, SP3Abs; 2947 SMLoc Loc; 2948 2949 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2950 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2951 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2952 return MatchOperand_ParseFail; 2953 } 2954 2955 SP3Neg = parseSP3NegModifier(); 2956 2957 Loc = getLoc(); 2958 Neg = trySkipId("neg"); 2959 if (Neg && SP3Neg) { 2960 Error(Loc, "expected register or immediate"); 2961 return MatchOperand_ParseFail; 2962 } 2963 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2964 return MatchOperand_ParseFail; 2965 2966 Abs = trySkipId("abs"); 2967 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2968 return MatchOperand_ParseFail; 2969 2970 Loc = getLoc(); 2971 SP3Abs = trySkipToken(AsmToken::Pipe); 2972 if (Abs && SP3Abs) { 2973 Error(Loc, "expected register or immediate"); 2974 return MatchOperand_ParseFail; 2975 } 2976 2977 OperandMatchResultTy Res; 2978 if (AllowImm) { 2979 Res = parseRegOrImm(Operands, SP3Abs); 2980 } else { 2981 Res = parseReg(Operands); 2982 } 2983 if (Res != MatchOperand_Success) { 2984 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2985 } 2986 2987 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2988 return MatchOperand_ParseFail; 2989 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2990 return MatchOperand_ParseFail; 2991 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2992 return MatchOperand_ParseFail; 2993 2994 AMDGPUOperand::Modifiers Mods; 2995 Mods.Abs = Abs || SP3Abs; 2996 Mods.Neg = Neg || SP3Neg; 2997 2998 if (Mods.hasFPModifiers()) { 2999 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3000 if (Op.isExpr()) { 3001 Error(Op.getStartLoc(), "expected an absolute expression"); 3002 return MatchOperand_ParseFail; 3003 } 3004 Op.setModifiers(Mods); 3005 } 3006 return MatchOperand_Success; 3007 } 3008 3009 OperandMatchResultTy 3010 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3011 bool AllowImm) { 3012 bool Sext = trySkipId("sext"); 3013 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3014 return MatchOperand_ParseFail; 3015 3016 OperandMatchResultTy Res; 3017 if (AllowImm) { 3018 Res = parseRegOrImm(Operands); 3019 } else { 3020 Res = parseReg(Operands); 3021 } 3022 if (Res != MatchOperand_Success) { 3023 return Sext? MatchOperand_ParseFail : Res; 3024 } 3025 3026 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3027 return MatchOperand_ParseFail; 3028 3029 AMDGPUOperand::Modifiers Mods; 3030 Mods.Sext = Sext; 3031 3032 if (Mods.hasIntModifiers()) { 3033 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3034 if (Op.isExpr()) { 3035 Error(Op.getStartLoc(), "expected an absolute expression"); 3036 return MatchOperand_ParseFail; 3037 } 3038 Op.setModifiers(Mods); 3039 } 3040 3041 return MatchOperand_Success; 3042 } 3043 3044 OperandMatchResultTy 3045 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3046 return parseRegOrImmWithFPInputMods(Operands, false); 3047 } 3048 3049 OperandMatchResultTy 3050 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3051 return parseRegOrImmWithIntInputMods(Operands, false); 3052 } 3053 3054 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3055 auto Loc = getLoc(); 3056 if (trySkipId("off")) { 3057 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3058 AMDGPUOperand::ImmTyOff, false)); 3059 return MatchOperand_Success; 3060 } 3061 3062 if (!isRegister()) 3063 return MatchOperand_NoMatch; 3064 3065 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3066 if (Reg) { 3067 Operands.push_back(std::move(Reg)); 3068 return MatchOperand_Success; 3069 } 3070 3071 return MatchOperand_ParseFail; 3072 3073 } 3074 3075 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3076 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3077 3078 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3079 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3080 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3081 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3082 return Match_InvalidOperand; 3083 3084 if ((TSFlags & SIInstrFlags::VOP3) && 3085 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3086 getForcedEncodingSize() != 64) 3087 return Match_PreferE32; 3088 3089 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3090 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3091 // v_mac_f32/16 allow only dst_sel == DWORD; 3092 auto OpNum = 3093 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3094 const auto &Op = Inst.getOperand(OpNum); 3095 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3096 return Match_InvalidOperand; 3097 } 3098 } 3099 3100 return Match_Success; 3101 } 3102 3103 static ArrayRef<unsigned> getAllVariants() { 3104 static const unsigned Variants[] = { 3105 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3106 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3107 }; 3108 3109 return makeArrayRef(Variants); 3110 } 3111 3112 // What asm variants we should check 3113 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3114 if (getForcedEncodingSize() == 32) { 3115 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3116 return makeArrayRef(Variants); 3117 } 3118 3119 if (isForcedVOP3()) { 3120 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3121 return makeArrayRef(Variants); 3122 } 3123 3124 if (isForcedSDWA()) { 3125 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3126 AMDGPUAsmVariants::SDWA9}; 3127 return makeArrayRef(Variants); 3128 } 3129 3130 if (isForcedDPP()) { 3131 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3132 return makeArrayRef(Variants); 3133 } 3134 3135 return getAllVariants(); 3136 } 3137 3138 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3139 if (getForcedEncodingSize() == 32) 3140 return "e32"; 3141 3142 if (isForcedVOP3()) 3143 return "e64"; 3144 3145 if (isForcedSDWA()) 3146 return "sdwa"; 3147 3148 if (isForcedDPP()) 3149 return "dpp"; 3150 3151 return ""; 3152 } 3153 3154 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3155 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3156 const unsigned Num = Desc.getNumImplicitUses(); 3157 for (unsigned i = 0; i < Num; ++i) { 3158 unsigned Reg = Desc.ImplicitUses[i]; 3159 switch (Reg) { 3160 case AMDGPU::FLAT_SCR: 3161 case AMDGPU::VCC: 3162 case AMDGPU::VCC_LO: 3163 case AMDGPU::VCC_HI: 3164 case AMDGPU::M0: 3165 return Reg; 3166 default: 3167 break; 3168 } 3169 } 3170 return AMDGPU::NoRegister; 3171 } 3172 3173 // NB: This code is correct only when used to check constant 3174 // bus limitations because GFX7 support no f16 inline constants. 3175 // Note that there are no cases when a GFX7 opcode violates 3176 // constant bus limitations due to the use of an f16 constant. 3177 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3178 unsigned OpIdx) const { 3179 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3180 3181 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3182 return false; 3183 } 3184 3185 const MCOperand &MO = Inst.getOperand(OpIdx); 3186 3187 int64_t Val = MO.getImm(); 3188 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3189 3190 switch (OpSize) { // expected operand size 3191 case 8: 3192 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3193 case 4: 3194 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3195 case 2: { 3196 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3197 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3198 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3199 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3200 return AMDGPU::isInlinableIntLiteral(Val); 3201 3202 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3203 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3204 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3205 return AMDGPU::isInlinableIntLiteralV216(Val); 3206 3207 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3208 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3209 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3210 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3211 3212 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3213 } 3214 default: 3215 llvm_unreachable("invalid operand size"); 3216 } 3217 } 3218 3219 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3220 if (!isGFX10Plus()) 3221 return 1; 3222 3223 switch (Opcode) { 3224 // 64-bit shift instructions can use only one scalar value input 3225 case AMDGPU::V_LSHLREV_B64_e64: 3226 case AMDGPU::V_LSHLREV_B64_gfx10: 3227 case AMDGPU::V_LSHRREV_B64_e64: 3228 case AMDGPU::V_LSHRREV_B64_gfx10: 3229 case AMDGPU::V_ASHRREV_I64_e64: 3230 case AMDGPU::V_ASHRREV_I64_gfx10: 3231 case AMDGPU::V_LSHL_B64_e64: 3232 case AMDGPU::V_LSHR_B64_e64: 3233 case AMDGPU::V_ASHR_I64_e64: 3234 return 1; 3235 default: 3236 return 2; 3237 } 3238 } 3239 3240 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3241 const MCOperand &MO = Inst.getOperand(OpIdx); 3242 if (MO.isImm()) { 3243 return !isInlineConstant(Inst, OpIdx); 3244 } else if (MO.isReg()) { 3245 auto Reg = MO.getReg(); 3246 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3247 auto PReg = mc2PseudoReg(Reg); 3248 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3249 } else { 3250 return true; 3251 } 3252 } 3253 3254 bool 3255 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3256 const OperandVector &Operands) { 3257 const unsigned Opcode = Inst.getOpcode(); 3258 const MCInstrDesc &Desc = MII.get(Opcode); 3259 unsigned LastSGPR = AMDGPU::NoRegister; 3260 unsigned ConstantBusUseCount = 0; 3261 unsigned NumLiterals = 0; 3262 unsigned LiteralSize; 3263 3264 if (Desc.TSFlags & 3265 (SIInstrFlags::VOPC | 3266 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3267 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3268 SIInstrFlags::SDWA)) { 3269 // Check special imm operands (used by madmk, etc) 3270 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3271 ++NumLiterals; 3272 LiteralSize = 4; 3273 } 3274 3275 SmallDenseSet<unsigned> SGPRsUsed; 3276 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3277 if (SGPRUsed != AMDGPU::NoRegister) { 3278 SGPRsUsed.insert(SGPRUsed); 3279 ++ConstantBusUseCount; 3280 } 3281 3282 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3283 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3284 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3285 3286 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3287 3288 for (int OpIdx : OpIndices) { 3289 if (OpIdx == -1) break; 3290 3291 const MCOperand &MO = Inst.getOperand(OpIdx); 3292 if (usesConstantBus(Inst, OpIdx)) { 3293 if (MO.isReg()) { 3294 LastSGPR = mc2PseudoReg(MO.getReg()); 3295 // Pairs of registers with a partial intersections like these 3296 // s0, s[0:1] 3297 // flat_scratch_lo, flat_scratch 3298 // flat_scratch_lo, flat_scratch_hi 3299 // are theoretically valid but they are disabled anyway. 3300 // Note that this code mimics SIInstrInfo::verifyInstruction 3301 if (!SGPRsUsed.count(LastSGPR)) { 3302 SGPRsUsed.insert(LastSGPR); 3303 ++ConstantBusUseCount; 3304 } 3305 } else { // Expression or a literal 3306 3307 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3308 continue; // special operand like VINTERP attr_chan 3309 3310 // An instruction may use only one literal. 3311 // This has been validated on the previous step. 3312 // See validateVOPLiteral. 3313 // This literal may be used as more than one operand. 3314 // If all these operands are of the same size, 3315 // this literal counts as one scalar value. 3316 // Otherwise it counts as 2 scalar values. 3317 // See "GFX10 Shader Programming", section 3.6.2.3. 3318 3319 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3320 if (Size < 4) Size = 4; 3321 3322 if (NumLiterals == 0) { 3323 NumLiterals = 1; 3324 LiteralSize = Size; 3325 } else if (LiteralSize != Size) { 3326 NumLiterals = 2; 3327 } 3328 } 3329 } 3330 } 3331 } 3332 ConstantBusUseCount += NumLiterals; 3333 3334 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3335 return true; 3336 3337 SMLoc LitLoc = getLitLoc(Operands); 3338 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3339 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3340 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3341 return false; 3342 } 3343 3344 bool 3345 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3346 const OperandVector &Operands) { 3347 const unsigned Opcode = Inst.getOpcode(); 3348 const MCInstrDesc &Desc = MII.get(Opcode); 3349 3350 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3351 if (DstIdx == -1 || 3352 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3353 return true; 3354 } 3355 3356 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3357 3358 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3359 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3360 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3361 3362 assert(DstIdx != -1); 3363 const MCOperand &Dst = Inst.getOperand(DstIdx); 3364 assert(Dst.isReg()); 3365 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3366 3367 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3368 3369 for (int SrcIdx : SrcIndices) { 3370 if (SrcIdx == -1) break; 3371 const MCOperand &Src = Inst.getOperand(SrcIdx); 3372 if (Src.isReg()) { 3373 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3374 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3375 Error(getRegLoc(SrcReg, Operands), 3376 "destination must be different than all sources"); 3377 return false; 3378 } 3379 } 3380 } 3381 3382 return true; 3383 } 3384 3385 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3386 3387 const unsigned Opc = Inst.getOpcode(); 3388 const MCInstrDesc &Desc = MII.get(Opc); 3389 3390 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3391 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3392 assert(ClampIdx != -1); 3393 return Inst.getOperand(ClampIdx).getImm() == 0; 3394 } 3395 3396 return true; 3397 } 3398 3399 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3400 3401 const unsigned Opc = Inst.getOpcode(); 3402 const MCInstrDesc &Desc = MII.get(Opc); 3403 3404 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3405 return true; 3406 3407 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3408 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3409 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3410 3411 assert(VDataIdx != -1); 3412 3413 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3414 return true; 3415 3416 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3417 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3418 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3419 if (DMask == 0) 3420 DMask = 1; 3421 3422 unsigned DataSize = 3423 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3424 if (hasPackedD16()) { 3425 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3426 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3427 DataSize = (DataSize + 1) / 2; 3428 } 3429 3430 return (VDataSize / 4) == DataSize + TFESize; 3431 } 3432 3433 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3434 const unsigned Opc = Inst.getOpcode(); 3435 const MCInstrDesc &Desc = MII.get(Opc); 3436 3437 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3438 return true; 3439 3440 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3441 3442 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3443 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3444 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3445 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3446 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3447 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3448 3449 assert(VAddr0Idx != -1); 3450 assert(SrsrcIdx != -1); 3451 assert(SrsrcIdx > VAddr0Idx); 3452 3453 if (DimIdx == -1) 3454 return true; // intersect_ray 3455 3456 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3457 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3458 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3459 unsigned ActualAddrSize = 3460 IsNSA ? SrsrcIdx - VAddr0Idx 3461 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3462 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3463 3464 unsigned ExpectedAddrSize = 3465 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3466 3467 if (!IsNSA) { 3468 if (ExpectedAddrSize > 8) 3469 ExpectedAddrSize = 16; 3470 3471 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3472 // This provides backward compatibility for assembly created 3473 // before 160b/192b/224b types were directly supported. 3474 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3475 return true; 3476 } 3477 3478 return ActualAddrSize == ExpectedAddrSize; 3479 } 3480 3481 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3482 3483 const unsigned Opc = Inst.getOpcode(); 3484 const MCInstrDesc &Desc = MII.get(Opc); 3485 3486 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3487 return true; 3488 if (!Desc.mayLoad() || !Desc.mayStore()) 3489 return true; // Not atomic 3490 3491 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3492 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3493 3494 // This is an incomplete check because image_atomic_cmpswap 3495 // may only use 0x3 and 0xf while other atomic operations 3496 // may use 0x1 and 0x3. However these limitations are 3497 // verified when we check that dmask matches dst size. 3498 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3499 } 3500 3501 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3502 3503 const unsigned Opc = Inst.getOpcode(); 3504 const MCInstrDesc &Desc = MII.get(Opc); 3505 3506 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3507 return true; 3508 3509 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3510 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3511 3512 // GATHER4 instructions use dmask in a different fashion compared to 3513 // other MIMG instructions. The only useful DMASK values are 3514 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3515 // (red,red,red,red) etc.) The ISA document doesn't mention 3516 // this. 3517 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3518 } 3519 3520 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3521 const unsigned Opc = Inst.getOpcode(); 3522 const MCInstrDesc &Desc = MII.get(Opc); 3523 3524 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3525 return true; 3526 3527 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3528 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3529 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3530 3531 if (!BaseOpcode->MSAA) 3532 return true; 3533 3534 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3535 assert(DimIdx != -1); 3536 3537 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3538 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3539 3540 return DimInfo->MSAA; 3541 } 3542 3543 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3544 { 3545 switch (Opcode) { 3546 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3547 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3548 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3549 return true; 3550 default: 3551 return false; 3552 } 3553 } 3554 3555 // movrels* opcodes should only allow VGPRS as src0. 3556 // This is specified in .td description for vop1/vop3, 3557 // but sdwa is handled differently. See isSDWAOperand. 3558 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3559 const OperandVector &Operands) { 3560 3561 const unsigned Opc = Inst.getOpcode(); 3562 const MCInstrDesc &Desc = MII.get(Opc); 3563 3564 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3565 return true; 3566 3567 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3568 assert(Src0Idx != -1); 3569 3570 SMLoc ErrLoc; 3571 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3572 if (Src0.isReg()) { 3573 auto Reg = mc2PseudoReg(Src0.getReg()); 3574 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3575 if (!isSGPR(Reg, TRI)) 3576 return true; 3577 ErrLoc = getRegLoc(Reg, Operands); 3578 } else { 3579 ErrLoc = getConstLoc(Operands); 3580 } 3581 3582 Error(ErrLoc, "source operand must be a VGPR"); 3583 return false; 3584 } 3585 3586 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3587 const OperandVector &Operands) { 3588 3589 const unsigned Opc = Inst.getOpcode(); 3590 3591 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3592 return true; 3593 3594 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3595 assert(Src0Idx != -1); 3596 3597 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3598 if (!Src0.isReg()) 3599 return true; 3600 3601 auto Reg = mc2PseudoReg(Src0.getReg()); 3602 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3603 if (isSGPR(Reg, TRI)) { 3604 Error(getRegLoc(Reg, Operands), 3605 "source operand must be either a VGPR or an inline constant"); 3606 return false; 3607 } 3608 3609 return true; 3610 } 3611 3612 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3613 switch (Inst.getOpcode()) { 3614 default: 3615 return true; 3616 case V_DIV_SCALE_F32_gfx6_gfx7: 3617 case V_DIV_SCALE_F32_vi: 3618 case V_DIV_SCALE_F32_gfx10: 3619 case V_DIV_SCALE_F64_gfx6_gfx7: 3620 case V_DIV_SCALE_F64_vi: 3621 case V_DIV_SCALE_F64_gfx10: 3622 break; 3623 } 3624 3625 // TODO: Check that src0 = src1 or src2. 3626 3627 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3628 AMDGPU::OpName::src2_modifiers, 3629 AMDGPU::OpName::src2_modifiers}) { 3630 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3631 .getImm() & 3632 SISrcMods::ABS) { 3633 return false; 3634 } 3635 } 3636 3637 return true; 3638 } 3639 3640 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3641 3642 const unsigned Opc = Inst.getOpcode(); 3643 const MCInstrDesc &Desc = MII.get(Opc); 3644 3645 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3646 return true; 3647 3648 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3649 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3650 if (isCI() || isSI()) 3651 return false; 3652 } 3653 3654 return true; 3655 } 3656 3657 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3658 const unsigned Opc = Inst.getOpcode(); 3659 const MCInstrDesc &Desc = MII.get(Opc); 3660 3661 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3662 return true; 3663 3664 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3665 if (DimIdx < 0) 3666 return true; 3667 3668 long Imm = Inst.getOperand(DimIdx).getImm(); 3669 if (Imm < 0 || Imm >= 8) 3670 return false; 3671 3672 return true; 3673 } 3674 3675 static bool IsRevOpcode(const unsigned Opcode) 3676 { 3677 switch (Opcode) { 3678 case AMDGPU::V_SUBREV_F32_e32: 3679 case AMDGPU::V_SUBREV_F32_e64: 3680 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3681 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3682 case AMDGPU::V_SUBREV_F32_e32_vi: 3683 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3684 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3685 case AMDGPU::V_SUBREV_F32_e64_vi: 3686 3687 case AMDGPU::V_SUBREV_CO_U32_e32: 3688 case AMDGPU::V_SUBREV_CO_U32_e64: 3689 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3690 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3691 3692 case AMDGPU::V_SUBBREV_U32_e32: 3693 case AMDGPU::V_SUBBREV_U32_e64: 3694 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3695 case AMDGPU::V_SUBBREV_U32_e32_vi: 3696 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3697 case AMDGPU::V_SUBBREV_U32_e64_vi: 3698 3699 case AMDGPU::V_SUBREV_U32_e32: 3700 case AMDGPU::V_SUBREV_U32_e64: 3701 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3702 case AMDGPU::V_SUBREV_U32_e32_vi: 3703 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3704 case AMDGPU::V_SUBREV_U32_e64_vi: 3705 3706 case AMDGPU::V_SUBREV_F16_e32: 3707 case AMDGPU::V_SUBREV_F16_e64: 3708 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3709 case AMDGPU::V_SUBREV_F16_e32_vi: 3710 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3711 case AMDGPU::V_SUBREV_F16_e64_vi: 3712 3713 case AMDGPU::V_SUBREV_U16_e32: 3714 case AMDGPU::V_SUBREV_U16_e64: 3715 case AMDGPU::V_SUBREV_U16_e32_vi: 3716 case AMDGPU::V_SUBREV_U16_e64_vi: 3717 3718 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3719 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3720 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3721 3722 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3723 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3724 3725 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3726 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3727 3728 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3729 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3730 3731 case AMDGPU::V_LSHRREV_B32_e32: 3732 case AMDGPU::V_LSHRREV_B32_e64: 3733 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3734 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3735 case AMDGPU::V_LSHRREV_B32_e32_vi: 3736 case AMDGPU::V_LSHRREV_B32_e64_vi: 3737 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3738 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3739 3740 case AMDGPU::V_ASHRREV_I32_e32: 3741 case AMDGPU::V_ASHRREV_I32_e64: 3742 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3743 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3744 case AMDGPU::V_ASHRREV_I32_e32_vi: 3745 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3746 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3747 case AMDGPU::V_ASHRREV_I32_e64_vi: 3748 3749 case AMDGPU::V_LSHLREV_B32_e32: 3750 case AMDGPU::V_LSHLREV_B32_e64: 3751 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3752 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3753 case AMDGPU::V_LSHLREV_B32_e32_vi: 3754 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3755 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3756 case AMDGPU::V_LSHLREV_B32_e64_vi: 3757 3758 case AMDGPU::V_LSHLREV_B16_e32: 3759 case AMDGPU::V_LSHLREV_B16_e64: 3760 case AMDGPU::V_LSHLREV_B16_e32_vi: 3761 case AMDGPU::V_LSHLREV_B16_e64_vi: 3762 case AMDGPU::V_LSHLREV_B16_gfx10: 3763 3764 case AMDGPU::V_LSHRREV_B16_e32: 3765 case AMDGPU::V_LSHRREV_B16_e64: 3766 case AMDGPU::V_LSHRREV_B16_e32_vi: 3767 case AMDGPU::V_LSHRREV_B16_e64_vi: 3768 case AMDGPU::V_LSHRREV_B16_gfx10: 3769 3770 case AMDGPU::V_ASHRREV_I16_e32: 3771 case AMDGPU::V_ASHRREV_I16_e64: 3772 case AMDGPU::V_ASHRREV_I16_e32_vi: 3773 case AMDGPU::V_ASHRREV_I16_e64_vi: 3774 case AMDGPU::V_ASHRREV_I16_gfx10: 3775 3776 case AMDGPU::V_LSHLREV_B64_e64: 3777 case AMDGPU::V_LSHLREV_B64_gfx10: 3778 case AMDGPU::V_LSHLREV_B64_vi: 3779 3780 case AMDGPU::V_LSHRREV_B64_e64: 3781 case AMDGPU::V_LSHRREV_B64_gfx10: 3782 case AMDGPU::V_LSHRREV_B64_vi: 3783 3784 case AMDGPU::V_ASHRREV_I64_e64: 3785 case AMDGPU::V_ASHRREV_I64_gfx10: 3786 case AMDGPU::V_ASHRREV_I64_vi: 3787 3788 case AMDGPU::V_PK_LSHLREV_B16: 3789 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3790 case AMDGPU::V_PK_LSHLREV_B16_vi: 3791 3792 case AMDGPU::V_PK_LSHRREV_B16: 3793 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3794 case AMDGPU::V_PK_LSHRREV_B16_vi: 3795 case AMDGPU::V_PK_ASHRREV_I16: 3796 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3797 case AMDGPU::V_PK_ASHRREV_I16_vi: 3798 return true; 3799 default: 3800 return false; 3801 } 3802 } 3803 3804 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3805 3806 using namespace SIInstrFlags; 3807 const unsigned Opcode = Inst.getOpcode(); 3808 const MCInstrDesc &Desc = MII.get(Opcode); 3809 3810 // lds_direct register is defined so that it can be used 3811 // with 9-bit operands only. Ignore encodings which do not accept these. 3812 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3813 if ((Desc.TSFlags & Enc) == 0) 3814 return None; 3815 3816 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3817 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3818 if (SrcIdx == -1) 3819 break; 3820 const auto &Src = Inst.getOperand(SrcIdx); 3821 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3822 3823 if (isGFX90A()) 3824 return StringRef("lds_direct is not supported on this GPU"); 3825 3826 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3827 return StringRef("lds_direct cannot be used with this instruction"); 3828 3829 if (SrcName != OpName::src0) 3830 return StringRef("lds_direct may be used as src0 only"); 3831 } 3832 } 3833 3834 return None; 3835 } 3836 3837 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3838 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3839 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3840 if (Op.isFlatOffset()) 3841 return Op.getStartLoc(); 3842 } 3843 return getLoc(); 3844 } 3845 3846 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3847 const OperandVector &Operands) { 3848 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3849 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3850 return true; 3851 3852 auto Opcode = Inst.getOpcode(); 3853 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3854 assert(OpNum != -1); 3855 3856 const auto &Op = Inst.getOperand(OpNum); 3857 if (!hasFlatOffsets() && Op.getImm() != 0) { 3858 Error(getFlatOffsetLoc(Operands), 3859 "flat offset modifier is not supported on this GPU"); 3860 return false; 3861 } 3862 3863 // For FLAT segment the offset must be positive; 3864 // MSB is ignored and forced to zero. 3865 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3866 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3867 if (!isIntN(OffsetSize, Op.getImm())) { 3868 Error(getFlatOffsetLoc(Operands), 3869 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3870 return false; 3871 } 3872 } else { 3873 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3874 if (!isUIntN(OffsetSize, Op.getImm())) { 3875 Error(getFlatOffsetLoc(Operands), 3876 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3877 return false; 3878 } 3879 } 3880 3881 return true; 3882 } 3883 3884 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3885 // Start with second operand because SMEM Offset cannot be dst or src0. 3886 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3887 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3888 if (Op.isSMEMOffset()) 3889 return Op.getStartLoc(); 3890 } 3891 return getLoc(); 3892 } 3893 3894 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3895 const OperandVector &Operands) { 3896 if (isCI() || isSI()) 3897 return true; 3898 3899 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3900 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3901 return true; 3902 3903 auto Opcode = Inst.getOpcode(); 3904 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3905 if (OpNum == -1) 3906 return true; 3907 3908 const auto &Op = Inst.getOperand(OpNum); 3909 if (!Op.isImm()) 3910 return true; 3911 3912 uint64_t Offset = Op.getImm(); 3913 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3914 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3915 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3916 return true; 3917 3918 Error(getSMEMOffsetLoc(Operands), 3919 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3920 "expected a 21-bit signed offset"); 3921 3922 return false; 3923 } 3924 3925 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3926 unsigned Opcode = Inst.getOpcode(); 3927 const MCInstrDesc &Desc = MII.get(Opcode); 3928 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3929 return true; 3930 3931 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3932 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3933 3934 const int OpIndices[] = { Src0Idx, Src1Idx }; 3935 3936 unsigned NumExprs = 0; 3937 unsigned NumLiterals = 0; 3938 uint32_t LiteralValue; 3939 3940 for (int OpIdx : OpIndices) { 3941 if (OpIdx == -1) break; 3942 3943 const MCOperand &MO = Inst.getOperand(OpIdx); 3944 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3945 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3946 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3947 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3948 if (NumLiterals == 0 || LiteralValue != Value) { 3949 LiteralValue = Value; 3950 ++NumLiterals; 3951 } 3952 } else if (MO.isExpr()) { 3953 ++NumExprs; 3954 } 3955 } 3956 } 3957 3958 return NumLiterals + NumExprs <= 1; 3959 } 3960 3961 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3962 const unsigned Opc = Inst.getOpcode(); 3963 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3964 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3965 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3966 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3967 3968 if (OpSel & ~3) 3969 return false; 3970 } 3971 return true; 3972 } 3973 3974 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3975 const OperandVector &Operands) { 3976 const unsigned Opc = Inst.getOpcode(); 3977 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3978 if (DppCtrlIdx < 0) 3979 return true; 3980 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3981 3982 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3983 // DPP64 is supported for row_newbcast only. 3984 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3985 if (Src0Idx >= 0 && 3986 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3987 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3988 Error(S, "64 bit dpp only supports row_newbcast"); 3989 return false; 3990 } 3991 } 3992 3993 return true; 3994 } 3995 3996 // Check if VCC register matches wavefront size 3997 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3998 auto FB = getFeatureBits(); 3999 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4000 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4001 } 4002 4003 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4004 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4005 const OperandVector &Operands) { 4006 unsigned Opcode = Inst.getOpcode(); 4007 const MCInstrDesc &Desc = MII.get(Opcode); 4008 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4009 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4010 ImmIdx == -1) 4011 return true; 4012 4013 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4014 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4015 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4016 4017 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4018 4019 unsigned NumExprs = 0; 4020 unsigned NumLiterals = 0; 4021 uint32_t LiteralValue; 4022 4023 for (int OpIdx : OpIndices) { 4024 if (OpIdx == -1) 4025 continue; 4026 4027 const MCOperand &MO = Inst.getOperand(OpIdx); 4028 if (!MO.isImm() && !MO.isExpr()) 4029 continue; 4030 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4031 continue; 4032 4033 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4034 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4035 Error(getConstLoc(Operands), 4036 "inline constants are not allowed for this operand"); 4037 return false; 4038 } 4039 4040 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4041 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4042 if (NumLiterals == 0 || LiteralValue != Value) { 4043 LiteralValue = Value; 4044 ++NumLiterals; 4045 } 4046 } else if (MO.isExpr()) { 4047 ++NumExprs; 4048 } 4049 } 4050 NumLiterals += NumExprs; 4051 4052 if (!NumLiterals) 4053 return true; 4054 4055 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4056 Error(getLitLoc(Operands), "literal operands are not supported"); 4057 return false; 4058 } 4059 4060 if (NumLiterals > 1) { 4061 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4062 return false; 4063 } 4064 4065 return true; 4066 } 4067 4068 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4069 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4070 const MCRegisterInfo *MRI) { 4071 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4072 if (OpIdx < 0) 4073 return -1; 4074 4075 const MCOperand &Op = Inst.getOperand(OpIdx); 4076 if (!Op.isReg()) 4077 return -1; 4078 4079 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4080 auto Reg = Sub ? Sub : Op.getReg(); 4081 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4082 return AGPR32.contains(Reg) ? 1 : 0; 4083 } 4084 4085 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4086 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4087 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4088 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4089 SIInstrFlags::DS)) == 0) 4090 return true; 4091 4092 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4093 : AMDGPU::OpName::vdata; 4094 4095 const MCRegisterInfo *MRI = getMRI(); 4096 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4097 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4098 4099 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4100 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4101 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4102 return false; 4103 } 4104 4105 auto FB = getFeatureBits(); 4106 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4107 if (DataAreg < 0 || DstAreg < 0) 4108 return true; 4109 return DstAreg == DataAreg; 4110 } 4111 4112 return DstAreg < 1 && DataAreg < 1; 4113 } 4114 4115 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4116 auto FB = getFeatureBits(); 4117 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4118 return true; 4119 4120 const MCRegisterInfo *MRI = getMRI(); 4121 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4122 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4123 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4124 const MCOperand &Op = Inst.getOperand(I); 4125 if (!Op.isReg()) 4126 continue; 4127 4128 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4129 if (!Sub) 4130 continue; 4131 4132 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4133 return false; 4134 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4135 return false; 4136 } 4137 4138 return true; 4139 } 4140 4141 // gfx90a has an undocumented limitation: 4142 // DS_GWS opcodes must use even aligned registers. 4143 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4144 const OperandVector &Operands) { 4145 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4146 return true; 4147 4148 int Opc = Inst.getOpcode(); 4149 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4150 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4151 return true; 4152 4153 const MCRegisterInfo *MRI = getMRI(); 4154 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4155 int Data0Pos = 4156 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4157 assert(Data0Pos != -1); 4158 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4159 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4160 if (RegIdx & 1) { 4161 SMLoc RegLoc = getRegLoc(Reg, Operands); 4162 Error(RegLoc, "vgpr must be even aligned"); 4163 return false; 4164 } 4165 4166 return true; 4167 } 4168 4169 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4170 const OperandVector &Operands, 4171 const SMLoc &IDLoc) { 4172 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4173 AMDGPU::OpName::cpol); 4174 if (CPolPos == -1) 4175 return true; 4176 4177 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4178 4179 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4180 if ((TSFlags & (SIInstrFlags::SMRD)) && 4181 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4182 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4183 return false; 4184 } 4185 4186 if (isGFX90A() && (CPol & CPol::SCC)) { 4187 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4188 StringRef CStr(S.getPointer()); 4189 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4190 Error(S, "scc is not supported on this GPU"); 4191 return false; 4192 } 4193 4194 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4195 return true; 4196 4197 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4198 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4199 Error(IDLoc, "instruction must use glc"); 4200 return false; 4201 } 4202 } else { 4203 if (CPol & CPol::GLC) { 4204 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4205 StringRef CStr(S.getPointer()); 4206 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4207 Error(S, "instruction must not use glc"); 4208 return false; 4209 } 4210 } 4211 4212 return true; 4213 } 4214 4215 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4216 const SMLoc &IDLoc, 4217 const OperandVector &Operands) { 4218 if (auto ErrMsg = validateLdsDirect(Inst)) { 4219 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4220 return false; 4221 } 4222 if (!validateSOPLiteral(Inst)) { 4223 Error(getLitLoc(Operands), 4224 "only one literal operand is allowed"); 4225 return false; 4226 } 4227 if (!validateVOPLiteral(Inst, Operands)) { 4228 return false; 4229 } 4230 if (!validateConstantBusLimitations(Inst, Operands)) { 4231 return false; 4232 } 4233 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4234 return false; 4235 } 4236 if (!validateIntClampSupported(Inst)) { 4237 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4238 "integer clamping is not supported on this GPU"); 4239 return false; 4240 } 4241 if (!validateOpSel(Inst)) { 4242 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4243 "invalid op_sel operand"); 4244 return false; 4245 } 4246 if (!validateDPP(Inst, Operands)) { 4247 return false; 4248 } 4249 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4250 if (!validateMIMGD16(Inst)) { 4251 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4252 "d16 modifier is not supported on this GPU"); 4253 return false; 4254 } 4255 if (!validateMIMGDim(Inst)) { 4256 Error(IDLoc, "dim modifier is required on this GPU"); 4257 return false; 4258 } 4259 if (!validateMIMGMSAA(Inst)) { 4260 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4261 "invalid dim; must be MSAA type"); 4262 return false; 4263 } 4264 if (!validateMIMGDataSize(Inst)) { 4265 Error(IDLoc, 4266 "image data size does not match dmask and tfe"); 4267 return false; 4268 } 4269 if (!validateMIMGAddrSize(Inst)) { 4270 Error(IDLoc, 4271 "image address size does not match dim and a16"); 4272 return false; 4273 } 4274 if (!validateMIMGAtomicDMask(Inst)) { 4275 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4276 "invalid atomic image dmask"); 4277 return false; 4278 } 4279 if (!validateMIMGGatherDMask(Inst)) { 4280 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4281 "invalid image_gather dmask: only one bit must be set"); 4282 return false; 4283 } 4284 if (!validateMovrels(Inst, Operands)) { 4285 return false; 4286 } 4287 if (!validateFlatOffset(Inst, Operands)) { 4288 return false; 4289 } 4290 if (!validateSMEMOffset(Inst, Operands)) { 4291 return false; 4292 } 4293 if (!validateMAIAccWrite(Inst, Operands)) { 4294 return false; 4295 } 4296 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4297 return false; 4298 } 4299 4300 if (!validateAGPRLdSt(Inst)) { 4301 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4302 ? "invalid register class: data and dst should be all VGPR or AGPR" 4303 : "invalid register class: agpr loads and stores not supported on this GPU" 4304 ); 4305 return false; 4306 } 4307 if (!validateVGPRAlign(Inst)) { 4308 Error(IDLoc, 4309 "invalid register class: vgpr tuples must be 64 bit aligned"); 4310 return false; 4311 } 4312 if (!validateGWS(Inst, Operands)) { 4313 return false; 4314 } 4315 4316 if (!validateDivScale(Inst)) { 4317 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4318 return false; 4319 } 4320 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4321 return false; 4322 } 4323 4324 return true; 4325 } 4326 4327 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4328 const FeatureBitset &FBS, 4329 unsigned VariantID = 0); 4330 4331 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4332 const FeatureBitset &AvailableFeatures, 4333 unsigned VariantID); 4334 4335 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4336 const FeatureBitset &FBS) { 4337 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4338 } 4339 4340 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4341 const FeatureBitset &FBS, 4342 ArrayRef<unsigned> Variants) { 4343 for (auto Variant : Variants) { 4344 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4345 return true; 4346 } 4347 4348 return false; 4349 } 4350 4351 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4352 const SMLoc &IDLoc) { 4353 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4354 4355 // Check if requested instruction variant is supported. 4356 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4357 return false; 4358 4359 // This instruction is not supported. 4360 // Clear any other pending errors because they are no longer relevant. 4361 getParser().clearPendingErrors(); 4362 4363 // Requested instruction variant is not supported. 4364 // Check if any other variants are supported. 4365 StringRef VariantName = getMatchedVariantName(); 4366 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4367 return Error(IDLoc, 4368 Twine(VariantName, 4369 " variant of this instruction is not supported")); 4370 } 4371 4372 // Finally check if this instruction is supported on any other GPU. 4373 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4374 return Error(IDLoc, "instruction not supported on this GPU"); 4375 } 4376 4377 // Instruction not supported on any GPU. Probably a typo. 4378 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4379 return Error(IDLoc, "invalid instruction" + Suggestion); 4380 } 4381 4382 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4383 OperandVector &Operands, 4384 MCStreamer &Out, 4385 uint64_t &ErrorInfo, 4386 bool MatchingInlineAsm) { 4387 MCInst Inst; 4388 unsigned Result = Match_Success; 4389 for (auto Variant : getMatchedVariants()) { 4390 uint64_t EI; 4391 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4392 Variant); 4393 // We order match statuses from least to most specific. We use most specific 4394 // status as resulting 4395 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4396 if ((R == Match_Success) || 4397 (R == Match_PreferE32) || 4398 (R == Match_MissingFeature && Result != Match_PreferE32) || 4399 (R == Match_InvalidOperand && Result != Match_MissingFeature 4400 && Result != Match_PreferE32) || 4401 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4402 && Result != Match_MissingFeature 4403 && Result != Match_PreferE32)) { 4404 Result = R; 4405 ErrorInfo = EI; 4406 } 4407 if (R == Match_Success) 4408 break; 4409 } 4410 4411 if (Result == Match_Success) { 4412 if (!validateInstruction(Inst, IDLoc, Operands)) { 4413 return true; 4414 } 4415 Inst.setLoc(IDLoc); 4416 Out.emitInstruction(Inst, getSTI()); 4417 return false; 4418 } 4419 4420 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4421 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4422 return true; 4423 } 4424 4425 switch (Result) { 4426 default: break; 4427 case Match_MissingFeature: 4428 // It has been verified that the specified instruction 4429 // mnemonic is valid. A match was found but it requires 4430 // features which are not supported on this GPU. 4431 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4432 4433 case Match_InvalidOperand: { 4434 SMLoc ErrorLoc = IDLoc; 4435 if (ErrorInfo != ~0ULL) { 4436 if (ErrorInfo >= Operands.size()) { 4437 return Error(IDLoc, "too few operands for instruction"); 4438 } 4439 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4440 if (ErrorLoc == SMLoc()) 4441 ErrorLoc = IDLoc; 4442 } 4443 return Error(ErrorLoc, "invalid operand for instruction"); 4444 } 4445 4446 case Match_PreferE32: 4447 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4448 "should be encoded as e32"); 4449 case Match_MnemonicFail: 4450 llvm_unreachable("Invalid instructions should have been handled already"); 4451 } 4452 llvm_unreachable("Implement any new match types added!"); 4453 } 4454 4455 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4456 int64_t Tmp = -1; 4457 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4458 return true; 4459 } 4460 if (getParser().parseAbsoluteExpression(Tmp)) { 4461 return true; 4462 } 4463 Ret = static_cast<uint32_t>(Tmp); 4464 return false; 4465 } 4466 4467 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4468 uint32_t &Minor) { 4469 if (ParseAsAbsoluteExpression(Major)) 4470 return TokError("invalid major version"); 4471 4472 if (!trySkipToken(AsmToken::Comma)) 4473 return TokError("minor version number required, comma expected"); 4474 4475 if (ParseAsAbsoluteExpression(Minor)) 4476 return TokError("invalid minor version"); 4477 4478 return false; 4479 } 4480 4481 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4482 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4483 return TokError("directive only supported for amdgcn architecture"); 4484 4485 std::string TargetIDDirective; 4486 SMLoc TargetStart = getTok().getLoc(); 4487 if (getParser().parseEscapedString(TargetIDDirective)) 4488 return true; 4489 4490 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4491 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4492 return getParser().Error(TargetRange.Start, 4493 (Twine(".amdgcn_target directive's target id ") + 4494 Twine(TargetIDDirective) + 4495 Twine(" does not match the specified target id ") + 4496 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4497 4498 return false; 4499 } 4500 4501 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4502 return Error(Range.Start, "value out of range", Range); 4503 } 4504 4505 bool AMDGPUAsmParser::calculateGPRBlocks( 4506 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4507 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4508 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4509 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4510 // TODO(scott.linder): These calculations are duplicated from 4511 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4512 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4513 4514 unsigned NumVGPRs = NextFreeVGPR; 4515 unsigned NumSGPRs = NextFreeSGPR; 4516 4517 if (Version.Major >= 10) 4518 NumSGPRs = 0; 4519 else { 4520 unsigned MaxAddressableNumSGPRs = 4521 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4522 4523 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4524 NumSGPRs > MaxAddressableNumSGPRs) 4525 return OutOfRangeError(SGPRRange); 4526 4527 NumSGPRs += 4528 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4529 4530 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4531 NumSGPRs > MaxAddressableNumSGPRs) 4532 return OutOfRangeError(SGPRRange); 4533 4534 if (Features.test(FeatureSGPRInitBug)) 4535 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4536 } 4537 4538 VGPRBlocks = 4539 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4540 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4541 4542 return false; 4543 } 4544 4545 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4546 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4547 return TokError("directive only supported for amdgcn architecture"); 4548 4549 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4550 return TokError("directive only supported for amdhsa OS"); 4551 4552 StringRef KernelName; 4553 if (getParser().parseIdentifier(KernelName)) 4554 return true; 4555 4556 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4557 4558 StringSet<> Seen; 4559 4560 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4561 4562 SMRange VGPRRange; 4563 uint64_t NextFreeVGPR = 0; 4564 uint64_t AccumOffset = 0; 4565 SMRange SGPRRange; 4566 uint64_t NextFreeSGPR = 0; 4567 unsigned UserSGPRCount = 0; 4568 bool ReserveVCC = true; 4569 bool ReserveFlatScr = true; 4570 Optional<bool> EnableWavefrontSize32; 4571 4572 while (true) { 4573 while (trySkipToken(AsmToken::EndOfStatement)); 4574 4575 StringRef ID; 4576 SMRange IDRange = getTok().getLocRange(); 4577 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4578 return true; 4579 4580 if (ID == ".end_amdhsa_kernel") 4581 break; 4582 4583 if (Seen.find(ID) != Seen.end()) 4584 return TokError(".amdhsa_ directives cannot be repeated"); 4585 Seen.insert(ID); 4586 4587 SMLoc ValStart = getLoc(); 4588 int64_t IVal; 4589 if (getParser().parseAbsoluteExpression(IVal)) 4590 return true; 4591 SMLoc ValEnd = getLoc(); 4592 SMRange ValRange = SMRange(ValStart, ValEnd); 4593 4594 if (IVal < 0) 4595 return OutOfRangeError(ValRange); 4596 4597 uint64_t Val = IVal; 4598 4599 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4600 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4601 return OutOfRangeError(RANGE); \ 4602 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4603 4604 if (ID == ".amdhsa_group_segment_fixed_size") { 4605 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4606 return OutOfRangeError(ValRange); 4607 KD.group_segment_fixed_size = Val; 4608 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4609 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4610 return OutOfRangeError(ValRange); 4611 KD.private_segment_fixed_size = Val; 4612 } else if (ID == ".amdhsa_kernarg_size") { 4613 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4614 return OutOfRangeError(ValRange); 4615 KD.kernarg_size = Val; 4616 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4617 if (hasArchitectedFlatScratch()) 4618 return Error(IDRange.Start, 4619 "directive is not supported with architected flat scratch", 4620 IDRange); 4621 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4622 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4623 Val, ValRange); 4624 if (Val) 4625 UserSGPRCount += 4; 4626 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4627 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4628 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4629 ValRange); 4630 if (Val) 4631 UserSGPRCount += 2; 4632 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4633 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4635 ValRange); 4636 if (Val) 4637 UserSGPRCount += 2; 4638 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4639 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4640 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4641 Val, ValRange); 4642 if (Val) 4643 UserSGPRCount += 2; 4644 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4645 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4646 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4647 ValRange); 4648 if (Val) 4649 UserSGPRCount += 2; 4650 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4651 if (hasArchitectedFlatScratch()) 4652 return Error(IDRange.Start, 4653 "directive is not supported with architected flat scratch", 4654 IDRange); 4655 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4656 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4657 ValRange); 4658 if (Val) 4659 UserSGPRCount += 2; 4660 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4661 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4662 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4663 Val, ValRange); 4664 if (Val) 4665 UserSGPRCount += 1; 4666 } else if (ID == ".amdhsa_wavefront_size32") { 4667 if (IVersion.Major < 10) 4668 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4669 EnableWavefrontSize32 = Val; 4670 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4671 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4672 Val, ValRange); 4673 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4674 if (hasArchitectedFlatScratch()) 4675 return Error(IDRange.Start, 4676 "directive is not supported with architected flat scratch", 4677 IDRange); 4678 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4679 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4680 } else if (ID == ".amdhsa_enable_private_segment") { 4681 if (!hasArchitectedFlatScratch()) 4682 return Error( 4683 IDRange.Start, 4684 "directive is not supported without architected flat scratch", 4685 IDRange); 4686 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4687 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4688 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4689 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4690 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4691 ValRange); 4692 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4693 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4694 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4695 ValRange); 4696 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4697 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4698 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4699 ValRange); 4700 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4701 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4702 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4703 ValRange); 4704 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4705 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4706 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4707 ValRange); 4708 } else if (ID == ".amdhsa_next_free_vgpr") { 4709 VGPRRange = ValRange; 4710 NextFreeVGPR = Val; 4711 } else if (ID == ".amdhsa_next_free_sgpr") { 4712 SGPRRange = ValRange; 4713 NextFreeSGPR = Val; 4714 } else if (ID == ".amdhsa_accum_offset") { 4715 if (!isGFX90A()) 4716 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4717 AccumOffset = Val; 4718 } else if (ID == ".amdhsa_reserve_vcc") { 4719 if (!isUInt<1>(Val)) 4720 return OutOfRangeError(ValRange); 4721 ReserveVCC = Val; 4722 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4723 if (IVersion.Major < 7) 4724 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4725 if (hasArchitectedFlatScratch()) 4726 return Error(IDRange.Start, 4727 "directive is not supported with architected flat scratch", 4728 IDRange); 4729 if (!isUInt<1>(Val)) 4730 return OutOfRangeError(ValRange); 4731 ReserveFlatScr = Val; 4732 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4733 if (IVersion.Major < 8) 4734 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4735 if (!isUInt<1>(Val)) 4736 return OutOfRangeError(ValRange); 4737 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4738 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4739 IDRange); 4740 } else if (ID == ".amdhsa_float_round_mode_32") { 4741 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4742 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4743 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4744 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4745 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4746 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4747 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4748 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4749 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4750 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4751 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4752 ValRange); 4753 } else if (ID == ".amdhsa_dx10_clamp") { 4754 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4755 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4756 } else if (ID == ".amdhsa_ieee_mode") { 4757 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4758 Val, ValRange); 4759 } else if (ID == ".amdhsa_fp16_overflow") { 4760 if (IVersion.Major < 9) 4761 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4763 ValRange); 4764 } else if (ID == ".amdhsa_tg_split") { 4765 if (!isGFX90A()) 4766 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4767 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4768 ValRange); 4769 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4770 if (IVersion.Major < 10) 4771 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4772 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4773 ValRange); 4774 } else if (ID == ".amdhsa_memory_ordered") { 4775 if (IVersion.Major < 10) 4776 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4777 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4778 ValRange); 4779 } else if (ID == ".amdhsa_forward_progress") { 4780 if (IVersion.Major < 10) 4781 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4782 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4783 ValRange); 4784 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4785 PARSE_BITS_ENTRY( 4786 KD.compute_pgm_rsrc2, 4787 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4788 ValRange); 4789 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4790 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4791 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4792 Val, ValRange); 4793 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4794 PARSE_BITS_ENTRY( 4795 KD.compute_pgm_rsrc2, 4796 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4797 ValRange); 4798 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4799 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4800 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4801 Val, ValRange); 4802 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4803 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4804 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4805 Val, ValRange); 4806 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4808 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4809 Val, ValRange); 4810 } else if (ID == ".amdhsa_exception_int_div_zero") { 4811 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4812 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4813 Val, ValRange); 4814 } else { 4815 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4816 } 4817 4818 #undef PARSE_BITS_ENTRY 4819 } 4820 4821 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4822 return TokError(".amdhsa_next_free_vgpr directive is required"); 4823 4824 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4825 return TokError(".amdhsa_next_free_sgpr directive is required"); 4826 4827 unsigned VGPRBlocks; 4828 unsigned SGPRBlocks; 4829 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4830 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4831 EnableWavefrontSize32, NextFreeVGPR, 4832 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4833 SGPRBlocks)) 4834 return true; 4835 4836 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4837 VGPRBlocks)) 4838 return OutOfRangeError(VGPRRange); 4839 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4840 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4841 4842 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4843 SGPRBlocks)) 4844 return OutOfRangeError(SGPRRange); 4845 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4846 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4847 SGPRBlocks); 4848 4849 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4850 return TokError("too many user SGPRs enabled"); 4851 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4852 UserSGPRCount); 4853 4854 if (isGFX90A()) { 4855 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4856 return TokError(".amdhsa_accum_offset directive is required"); 4857 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4858 return TokError("accum_offset should be in range [4..256] in " 4859 "increments of 4"); 4860 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4861 return TokError("accum_offset exceeds total VGPR allocation"); 4862 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4863 (AccumOffset / 4 - 1)); 4864 } 4865 4866 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4867 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4868 ReserveFlatScr); 4869 return false; 4870 } 4871 4872 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4873 uint32_t Major; 4874 uint32_t Minor; 4875 4876 if (ParseDirectiveMajorMinor(Major, Minor)) 4877 return true; 4878 4879 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4880 return false; 4881 } 4882 4883 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4884 uint32_t Major; 4885 uint32_t Minor; 4886 uint32_t Stepping; 4887 StringRef VendorName; 4888 StringRef ArchName; 4889 4890 // If this directive has no arguments, then use the ISA version for the 4891 // targeted GPU. 4892 if (isToken(AsmToken::EndOfStatement)) { 4893 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4894 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4895 ISA.Stepping, 4896 "AMD", "AMDGPU"); 4897 return false; 4898 } 4899 4900 if (ParseDirectiveMajorMinor(Major, Minor)) 4901 return true; 4902 4903 if (!trySkipToken(AsmToken::Comma)) 4904 return TokError("stepping version number required, comma expected"); 4905 4906 if (ParseAsAbsoluteExpression(Stepping)) 4907 return TokError("invalid stepping version"); 4908 4909 if (!trySkipToken(AsmToken::Comma)) 4910 return TokError("vendor name required, comma expected"); 4911 4912 if (!parseString(VendorName, "invalid vendor name")) 4913 return true; 4914 4915 if (!trySkipToken(AsmToken::Comma)) 4916 return TokError("arch name required, comma expected"); 4917 4918 if (!parseString(ArchName, "invalid arch name")) 4919 return true; 4920 4921 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4922 VendorName, ArchName); 4923 return false; 4924 } 4925 4926 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4927 amd_kernel_code_t &Header) { 4928 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4929 // assembly for backwards compatibility. 4930 if (ID == "max_scratch_backing_memory_byte_size") { 4931 Parser.eatToEndOfStatement(); 4932 return false; 4933 } 4934 4935 SmallString<40> ErrStr; 4936 raw_svector_ostream Err(ErrStr); 4937 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4938 return TokError(Err.str()); 4939 } 4940 Lex(); 4941 4942 if (ID == "enable_wavefront_size32") { 4943 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4944 if (!isGFX10Plus()) 4945 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4946 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4947 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4948 } else { 4949 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4950 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4951 } 4952 } 4953 4954 if (ID == "wavefront_size") { 4955 if (Header.wavefront_size == 5) { 4956 if (!isGFX10Plus()) 4957 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4958 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4959 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4960 } else if (Header.wavefront_size == 6) { 4961 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4962 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4963 } 4964 } 4965 4966 if (ID == "enable_wgp_mode") { 4967 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4968 !isGFX10Plus()) 4969 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4970 } 4971 4972 if (ID == "enable_mem_ordered") { 4973 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4974 !isGFX10Plus()) 4975 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4976 } 4977 4978 if (ID == "enable_fwd_progress") { 4979 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4980 !isGFX10Plus()) 4981 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4982 } 4983 4984 return false; 4985 } 4986 4987 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4988 amd_kernel_code_t Header; 4989 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4990 4991 while (true) { 4992 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4993 // will set the current token to EndOfStatement. 4994 while(trySkipToken(AsmToken::EndOfStatement)); 4995 4996 StringRef ID; 4997 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4998 return true; 4999 5000 if (ID == ".end_amd_kernel_code_t") 5001 break; 5002 5003 if (ParseAMDKernelCodeTValue(ID, Header)) 5004 return true; 5005 } 5006 5007 getTargetStreamer().EmitAMDKernelCodeT(Header); 5008 5009 return false; 5010 } 5011 5012 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5013 StringRef KernelName; 5014 if (!parseId(KernelName, "expected symbol name")) 5015 return true; 5016 5017 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5018 ELF::STT_AMDGPU_HSA_KERNEL); 5019 5020 KernelScope.initialize(getContext()); 5021 return false; 5022 } 5023 5024 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5025 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5026 return Error(getLoc(), 5027 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5028 "architectures"); 5029 } 5030 5031 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5032 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5033 return Error(getParser().getTok().getLoc(), "target id must match options"); 5034 5035 getTargetStreamer().EmitISAVersion(); 5036 Lex(); 5037 5038 return false; 5039 } 5040 5041 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5042 const char *AssemblerDirectiveBegin; 5043 const char *AssemblerDirectiveEnd; 5044 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5045 isHsaAbiVersion3Or4(&getSTI()) 5046 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5047 HSAMD::V3::AssemblerDirectiveEnd) 5048 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5049 HSAMD::AssemblerDirectiveEnd); 5050 5051 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5052 return Error(getLoc(), 5053 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5054 "not available on non-amdhsa OSes")).str()); 5055 } 5056 5057 std::string HSAMetadataString; 5058 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5059 HSAMetadataString)) 5060 return true; 5061 5062 if (isHsaAbiVersion3Or4(&getSTI())) { 5063 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5064 return Error(getLoc(), "invalid HSA metadata"); 5065 } else { 5066 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5067 return Error(getLoc(), "invalid HSA metadata"); 5068 } 5069 5070 return false; 5071 } 5072 5073 /// Common code to parse out a block of text (typically YAML) between start and 5074 /// end directives. 5075 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5076 const char *AssemblerDirectiveEnd, 5077 std::string &CollectString) { 5078 5079 raw_string_ostream CollectStream(CollectString); 5080 5081 getLexer().setSkipSpace(false); 5082 5083 bool FoundEnd = false; 5084 while (!isToken(AsmToken::Eof)) { 5085 while (isToken(AsmToken::Space)) { 5086 CollectStream << getTokenStr(); 5087 Lex(); 5088 } 5089 5090 if (trySkipId(AssemblerDirectiveEnd)) { 5091 FoundEnd = true; 5092 break; 5093 } 5094 5095 CollectStream << Parser.parseStringToEndOfStatement() 5096 << getContext().getAsmInfo()->getSeparatorString(); 5097 5098 Parser.eatToEndOfStatement(); 5099 } 5100 5101 getLexer().setSkipSpace(true); 5102 5103 if (isToken(AsmToken::Eof) && !FoundEnd) { 5104 return TokError(Twine("expected directive ") + 5105 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5106 } 5107 5108 CollectStream.flush(); 5109 return false; 5110 } 5111 5112 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5113 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5114 std::string String; 5115 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5116 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5117 return true; 5118 5119 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5120 if (!PALMetadata->setFromString(String)) 5121 return Error(getLoc(), "invalid PAL metadata"); 5122 return false; 5123 } 5124 5125 /// Parse the assembler directive for old linear-format PAL metadata. 5126 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5127 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5128 return Error(getLoc(), 5129 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5130 "not available on non-amdpal OSes")).str()); 5131 } 5132 5133 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5134 PALMetadata->setLegacy(); 5135 for (;;) { 5136 uint32_t Key, Value; 5137 if (ParseAsAbsoluteExpression(Key)) { 5138 return TokError(Twine("invalid value in ") + 5139 Twine(PALMD::AssemblerDirective)); 5140 } 5141 if (!trySkipToken(AsmToken::Comma)) { 5142 return TokError(Twine("expected an even number of values in ") + 5143 Twine(PALMD::AssemblerDirective)); 5144 } 5145 if (ParseAsAbsoluteExpression(Value)) { 5146 return TokError(Twine("invalid value in ") + 5147 Twine(PALMD::AssemblerDirective)); 5148 } 5149 PALMetadata->setRegister(Key, Value); 5150 if (!trySkipToken(AsmToken::Comma)) 5151 break; 5152 } 5153 return false; 5154 } 5155 5156 /// ParseDirectiveAMDGPULDS 5157 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5158 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5159 if (getParser().checkForValidSection()) 5160 return true; 5161 5162 StringRef Name; 5163 SMLoc NameLoc = getLoc(); 5164 if (getParser().parseIdentifier(Name)) 5165 return TokError("expected identifier in directive"); 5166 5167 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5168 if (parseToken(AsmToken::Comma, "expected ','")) 5169 return true; 5170 5171 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5172 5173 int64_t Size; 5174 SMLoc SizeLoc = getLoc(); 5175 if (getParser().parseAbsoluteExpression(Size)) 5176 return true; 5177 if (Size < 0) 5178 return Error(SizeLoc, "size must be non-negative"); 5179 if (Size > LocalMemorySize) 5180 return Error(SizeLoc, "size is too large"); 5181 5182 int64_t Alignment = 4; 5183 if (trySkipToken(AsmToken::Comma)) { 5184 SMLoc AlignLoc = getLoc(); 5185 if (getParser().parseAbsoluteExpression(Alignment)) 5186 return true; 5187 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5188 return Error(AlignLoc, "alignment must be a power of two"); 5189 5190 // Alignment larger than the size of LDS is possible in theory, as long 5191 // as the linker manages to place to symbol at address 0, but we do want 5192 // to make sure the alignment fits nicely into a 32-bit integer. 5193 if (Alignment >= 1u << 31) 5194 return Error(AlignLoc, "alignment is too large"); 5195 } 5196 5197 if (parseToken(AsmToken::EndOfStatement, 5198 "unexpected token in '.amdgpu_lds' directive")) 5199 return true; 5200 5201 Symbol->redefineIfPossible(); 5202 if (!Symbol->isUndefined()) 5203 return Error(NameLoc, "invalid symbol redefinition"); 5204 5205 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5206 return false; 5207 } 5208 5209 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5210 StringRef IDVal = DirectiveID.getString(); 5211 5212 if (isHsaAbiVersion3Or4(&getSTI())) { 5213 if (IDVal == ".amdhsa_kernel") 5214 return ParseDirectiveAMDHSAKernel(); 5215 5216 // TODO: Restructure/combine with PAL metadata directive. 5217 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5218 return ParseDirectiveHSAMetadata(); 5219 } else { 5220 if (IDVal == ".hsa_code_object_version") 5221 return ParseDirectiveHSACodeObjectVersion(); 5222 5223 if (IDVal == ".hsa_code_object_isa") 5224 return ParseDirectiveHSACodeObjectISA(); 5225 5226 if (IDVal == ".amd_kernel_code_t") 5227 return ParseDirectiveAMDKernelCodeT(); 5228 5229 if (IDVal == ".amdgpu_hsa_kernel") 5230 return ParseDirectiveAMDGPUHsaKernel(); 5231 5232 if (IDVal == ".amd_amdgpu_isa") 5233 return ParseDirectiveISAVersion(); 5234 5235 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5236 return ParseDirectiveHSAMetadata(); 5237 } 5238 5239 if (IDVal == ".amdgcn_target") 5240 return ParseDirectiveAMDGCNTarget(); 5241 5242 if (IDVal == ".amdgpu_lds") 5243 return ParseDirectiveAMDGPULDS(); 5244 5245 if (IDVal == PALMD::AssemblerDirectiveBegin) 5246 return ParseDirectivePALMetadataBegin(); 5247 5248 if (IDVal == PALMD::AssemblerDirective) 5249 return ParseDirectivePALMetadata(); 5250 5251 return true; 5252 } 5253 5254 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5255 unsigned RegNo) { 5256 5257 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5258 R.isValid(); ++R) { 5259 if (*R == RegNo) 5260 return isGFX9Plus(); 5261 } 5262 5263 // GFX10 has 2 more SGPRs 104 and 105. 5264 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5265 R.isValid(); ++R) { 5266 if (*R == RegNo) 5267 return hasSGPR104_SGPR105(); 5268 } 5269 5270 switch (RegNo) { 5271 case AMDGPU::SRC_SHARED_BASE: 5272 case AMDGPU::SRC_SHARED_LIMIT: 5273 case AMDGPU::SRC_PRIVATE_BASE: 5274 case AMDGPU::SRC_PRIVATE_LIMIT: 5275 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5276 return isGFX9Plus(); 5277 case AMDGPU::TBA: 5278 case AMDGPU::TBA_LO: 5279 case AMDGPU::TBA_HI: 5280 case AMDGPU::TMA: 5281 case AMDGPU::TMA_LO: 5282 case AMDGPU::TMA_HI: 5283 return !isGFX9Plus(); 5284 case AMDGPU::XNACK_MASK: 5285 case AMDGPU::XNACK_MASK_LO: 5286 case AMDGPU::XNACK_MASK_HI: 5287 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5288 case AMDGPU::SGPR_NULL: 5289 return isGFX10Plus(); 5290 default: 5291 break; 5292 } 5293 5294 if (isCI()) 5295 return true; 5296 5297 if (isSI() || isGFX10Plus()) { 5298 // No flat_scr on SI. 5299 // On GFX10 flat scratch is not a valid register operand and can only be 5300 // accessed with s_setreg/s_getreg. 5301 switch (RegNo) { 5302 case AMDGPU::FLAT_SCR: 5303 case AMDGPU::FLAT_SCR_LO: 5304 case AMDGPU::FLAT_SCR_HI: 5305 return false; 5306 default: 5307 return true; 5308 } 5309 } 5310 5311 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5312 // SI/CI have. 5313 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5314 R.isValid(); ++R) { 5315 if (*R == RegNo) 5316 return hasSGPR102_SGPR103(); 5317 } 5318 5319 return true; 5320 } 5321 5322 OperandMatchResultTy 5323 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5324 OperandMode Mode) { 5325 // Try to parse with a custom parser 5326 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5327 5328 // If we successfully parsed the operand or if there as an error parsing, 5329 // we are done. 5330 // 5331 // If we are parsing after we reach EndOfStatement then this means we 5332 // are appending default values to the Operands list. This is only done 5333 // by custom parser, so we shouldn't continue on to the generic parsing. 5334 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5335 isToken(AsmToken::EndOfStatement)) 5336 return ResTy; 5337 5338 SMLoc RBraceLoc; 5339 SMLoc LBraceLoc = getLoc(); 5340 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5341 unsigned Prefix = Operands.size(); 5342 5343 for (;;) { 5344 auto Loc = getLoc(); 5345 ResTy = parseReg(Operands); 5346 if (ResTy == MatchOperand_NoMatch) 5347 Error(Loc, "expected a register"); 5348 if (ResTy != MatchOperand_Success) 5349 return MatchOperand_ParseFail; 5350 5351 RBraceLoc = getLoc(); 5352 if (trySkipToken(AsmToken::RBrac)) 5353 break; 5354 5355 if (!skipToken(AsmToken::Comma, 5356 "expected a comma or a closing square bracket")) { 5357 return MatchOperand_ParseFail; 5358 } 5359 } 5360 5361 if (Operands.size() - Prefix > 1) { 5362 Operands.insert(Operands.begin() + Prefix, 5363 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5364 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5365 } 5366 5367 return MatchOperand_Success; 5368 } 5369 5370 return parseRegOrImm(Operands); 5371 } 5372 5373 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5374 // Clear any forced encodings from the previous instruction. 5375 setForcedEncodingSize(0); 5376 setForcedDPP(false); 5377 setForcedSDWA(false); 5378 5379 if (Name.endswith("_e64")) { 5380 setForcedEncodingSize(64); 5381 return Name.substr(0, Name.size() - 4); 5382 } else if (Name.endswith("_e32")) { 5383 setForcedEncodingSize(32); 5384 return Name.substr(0, Name.size() - 4); 5385 } else if (Name.endswith("_dpp")) { 5386 setForcedDPP(true); 5387 return Name.substr(0, Name.size() - 4); 5388 } else if (Name.endswith("_sdwa")) { 5389 setForcedSDWA(true); 5390 return Name.substr(0, Name.size() - 5); 5391 } 5392 return Name; 5393 } 5394 5395 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5396 StringRef Name, 5397 SMLoc NameLoc, OperandVector &Operands) { 5398 // Add the instruction mnemonic 5399 Name = parseMnemonicSuffix(Name); 5400 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5401 5402 bool IsMIMG = Name.startswith("image_"); 5403 5404 while (!trySkipToken(AsmToken::EndOfStatement)) { 5405 OperandMode Mode = OperandMode_Default; 5406 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5407 Mode = OperandMode_NSA; 5408 CPolSeen = 0; 5409 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5410 5411 if (Res != MatchOperand_Success) { 5412 checkUnsupportedInstruction(Name, NameLoc); 5413 if (!Parser.hasPendingError()) { 5414 // FIXME: use real operand location rather than the current location. 5415 StringRef Msg = 5416 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5417 "not a valid operand."; 5418 Error(getLoc(), Msg); 5419 } 5420 while (!trySkipToken(AsmToken::EndOfStatement)) { 5421 lex(); 5422 } 5423 return true; 5424 } 5425 5426 // Eat the comma or space if there is one. 5427 trySkipToken(AsmToken::Comma); 5428 } 5429 5430 return false; 5431 } 5432 5433 //===----------------------------------------------------------------------===// 5434 // Utility functions 5435 //===----------------------------------------------------------------------===// 5436 5437 OperandMatchResultTy 5438 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5439 5440 if (!trySkipId(Prefix, AsmToken::Colon)) 5441 return MatchOperand_NoMatch; 5442 5443 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5444 } 5445 5446 OperandMatchResultTy 5447 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5448 AMDGPUOperand::ImmTy ImmTy, 5449 bool (*ConvertResult)(int64_t&)) { 5450 SMLoc S = getLoc(); 5451 int64_t Value = 0; 5452 5453 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5454 if (Res != MatchOperand_Success) 5455 return Res; 5456 5457 if (ConvertResult && !ConvertResult(Value)) { 5458 Error(S, "invalid " + StringRef(Prefix) + " value."); 5459 } 5460 5461 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5462 return MatchOperand_Success; 5463 } 5464 5465 OperandMatchResultTy 5466 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5467 OperandVector &Operands, 5468 AMDGPUOperand::ImmTy ImmTy, 5469 bool (*ConvertResult)(int64_t&)) { 5470 SMLoc S = getLoc(); 5471 if (!trySkipId(Prefix, AsmToken::Colon)) 5472 return MatchOperand_NoMatch; 5473 5474 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5475 return MatchOperand_ParseFail; 5476 5477 unsigned Val = 0; 5478 const unsigned MaxSize = 4; 5479 5480 // FIXME: How to verify the number of elements matches the number of src 5481 // operands? 5482 for (int I = 0; ; ++I) { 5483 int64_t Op; 5484 SMLoc Loc = getLoc(); 5485 if (!parseExpr(Op)) 5486 return MatchOperand_ParseFail; 5487 5488 if (Op != 0 && Op != 1) { 5489 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5490 return MatchOperand_ParseFail; 5491 } 5492 5493 Val |= (Op << I); 5494 5495 if (trySkipToken(AsmToken::RBrac)) 5496 break; 5497 5498 if (I + 1 == MaxSize) { 5499 Error(getLoc(), "expected a closing square bracket"); 5500 return MatchOperand_ParseFail; 5501 } 5502 5503 if (!skipToken(AsmToken::Comma, "expected a comma")) 5504 return MatchOperand_ParseFail; 5505 } 5506 5507 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5508 return MatchOperand_Success; 5509 } 5510 5511 OperandMatchResultTy 5512 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5513 AMDGPUOperand::ImmTy ImmTy) { 5514 int64_t Bit; 5515 SMLoc S = getLoc(); 5516 5517 if (trySkipId(Name)) { 5518 Bit = 1; 5519 } else if (trySkipId("no", Name)) { 5520 Bit = 0; 5521 } else { 5522 return MatchOperand_NoMatch; 5523 } 5524 5525 if (Name == "r128" && !hasMIMG_R128()) { 5526 Error(S, "r128 modifier is not supported on this GPU"); 5527 return MatchOperand_ParseFail; 5528 } 5529 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5530 Error(S, "a16 modifier is not supported on this GPU"); 5531 return MatchOperand_ParseFail; 5532 } 5533 5534 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5535 ImmTy = AMDGPUOperand::ImmTyR128A16; 5536 5537 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5538 return MatchOperand_Success; 5539 } 5540 5541 OperandMatchResultTy 5542 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5543 unsigned CPolOn = 0; 5544 unsigned CPolOff = 0; 5545 SMLoc S = getLoc(); 5546 5547 if (trySkipId("glc")) 5548 CPolOn = AMDGPU::CPol::GLC; 5549 else if (trySkipId("noglc")) 5550 CPolOff = AMDGPU::CPol::GLC; 5551 else if (trySkipId("slc")) 5552 CPolOn = AMDGPU::CPol::SLC; 5553 else if (trySkipId("noslc")) 5554 CPolOff = AMDGPU::CPol::SLC; 5555 else if (trySkipId("dlc")) 5556 CPolOn = AMDGPU::CPol::DLC; 5557 else if (trySkipId("nodlc")) 5558 CPolOff = AMDGPU::CPol::DLC; 5559 else if (trySkipId("scc")) 5560 CPolOn = AMDGPU::CPol::SCC; 5561 else if (trySkipId("noscc")) 5562 CPolOff = AMDGPU::CPol::SCC; 5563 else 5564 return MatchOperand_NoMatch; 5565 5566 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5567 Error(S, "dlc modifier is not supported on this GPU"); 5568 return MatchOperand_ParseFail; 5569 } 5570 5571 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5572 Error(S, "scc modifier is not supported on this GPU"); 5573 return MatchOperand_ParseFail; 5574 } 5575 5576 if (CPolSeen & (CPolOn | CPolOff)) { 5577 Error(S, "duplicate cache policy modifier"); 5578 return MatchOperand_ParseFail; 5579 } 5580 5581 CPolSeen |= (CPolOn | CPolOff); 5582 5583 for (unsigned I = 1; I != Operands.size(); ++I) { 5584 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5585 if (Op.isCPol()) { 5586 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5587 return MatchOperand_Success; 5588 } 5589 } 5590 5591 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5592 AMDGPUOperand::ImmTyCPol)); 5593 5594 return MatchOperand_Success; 5595 } 5596 5597 static void addOptionalImmOperand( 5598 MCInst& Inst, const OperandVector& Operands, 5599 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5600 AMDGPUOperand::ImmTy ImmT, 5601 int64_t Default = 0) { 5602 auto i = OptionalIdx.find(ImmT); 5603 if (i != OptionalIdx.end()) { 5604 unsigned Idx = i->second; 5605 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5606 } else { 5607 Inst.addOperand(MCOperand::createImm(Default)); 5608 } 5609 } 5610 5611 OperandMatchResultTy 5612 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5613 StringRef &Value, 5614 SMLoc &StringLoc) { 5615 if (!trySkipId(Prefix, AsmToken::Colon)) 5616 return MatchOperand_NoMatch; 5617 5618 StringLoc = getLoc(); 5619 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5620 : MatchOperand_ParseFail; 5621 } 5622 5623 //===----------------------------------------------------------------------===// 5624 // MTBUF format 5625 //===----------------------------------------------------------------------===// 5626 5627 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5628 int64_t MaxVal, 5629 int64_t &Fmt) { 5630 int64_t Val; 5631 SMLoc Loc = getLoc(); 5632 5633 auto Res = parseIntWithPrefix(Pref, Val); 5634 if (Res == MatchOperand_ParseFail) 5635 return false; 5636 if (Res == MatchOperand_NoMatch) 5637 return true; 5638 5639 if (Val < 0 || Val > MaxVal) { 5640 Error(Loc, Twine("out of range ", StringRef(Pref))); 5641 return false; 5642 } 5643 5644 Fmt = Val; 5645 return true; 5646 } 5647 5648 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5649 // values to live in a joint format operand in the MCInst encoding. 5650 OperandMatchResultTy 5651 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5652 using namespace llvm::AMDGPU::MTBUFFormat; 5653 5654 int64_t Dfmt = DFMT_UNDEF; 5655 int64_t Nfmt = NFMT_UNDEF; 5656 5657 // dfmt and nfmt can appear in either order, and each is optional. 5658 for (int I = 0; I < 2; ++I) { 5659 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5660 return MatchOperand_ParseFail; 5661 5662 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5663 return MatchOperand_ParseFail; 5664 } 5665 // Skip optional comma between dfmt/nfmt 5666 // but guard against 2 commas following each other. 5667 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5668 !peekToken().is(AsmToken::Comma)) { 5669 trySkipToken(AsmToken::Comma); 5670 } 5671 } 5672 5673 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5674 return MatchOperand_NoMatch; 5675 5676 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5677 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5678 5679 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5680 return MatchOperand_Success; 5681 } 5682 5683 OperandMatchResultTy 5684 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5685 using namespace llvm::AMDGPU::MTBUFFormat; 5686 5687 int64_t Fmt = UFMT_UNDEF; 5688 5689 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5690 return MatchOperand_ParseFail; 5691 5692 if (Fmt == UFMT_UNDEF) 5693 return MatchOperand_NoMatch; 5694 5695 Format = Fmt; 5696 return MatchOperand_Success; 5697 } 5698 5699 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5700 int64_t &Nfmt, 5701 StringRef FormatStr, 5702 SMLoc Loc) { 5703 using namespace llvm::AMDGPU::MTBUFFormat; 5704 int64_t Format; 5705 5706 Format = getDfmt(FormatStr); 5707 if (Format != DFMT_UNDEF) { 5708 Dfmt = Format; 5709 return true; 5710 } 5711 5712 Format = getNfmt(FormatStr, getSTI()); 5713 if (Format != NFMT_UNDEF) { 5714 Nfmt = Format; 5715 return true; 5716 } 5717 5718 Error(Loc, "unsupported format"); 5719 return false; 5720 } 5721 5722 OperandMatchResultTy 5723 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5724 SMLoc FormatLoc, 5725 int64_t &Format) { 5726 using namespace llvm::AMDGPU::MTBUFFormat; 5727 5728 int64_t Dfmt = DFMT_UNDEF; 5729 int64_t Nfmt = NFMT_UNDEF; 5730 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5731 return MatchOperand_ParseFail; 5732 5733 if (trySkipToken(AsmToken::Comma)) { 5734 StringRef Str; 5735 SMLoc Loc = getLoc(); 5736 if (!parseId(Str, "expected a format string") || 5737 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5738 return MatchOperand_ParseFail; 5739 } 5740 if (Dfmt == DFMT_UNDEF) { 5741 Error(Loc, "duplicate numeric format"); 5742 return MatchOperand_ParseFail; 5743 } else if (Nfmt == NFMT_UNDEF) { 5744 Error(Loc, "duplicate data format"); 5745 return MatchOperand_ParseFail; 5746 } 5747 } 5748 5749 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5750 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5751 5752 if (isGFX10Plus()) { 5753 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5754 if (Ufmt == UFMT_UNDEF) { 5755 Error(FormatLoc, "unsupported format"); 5756 return MatchOperand_ParseFail; 5757 } 5758 Format = Ufmt; 5759 } else { 5760 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5761 } 5762 5763 return MatchOperand_Success; 5764 } 5765 5766 OperandMatchResultTy 5767 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5768 SMLoc Loc, 5769 int64_t &Format) { 5770 using namespace llvm::AMDGPU::MTBUFFormat; 5771 5772 auto Id = getUnifiedFormat(FormatStr); 5773 if (Id == UFMT_UNDEF) 5774 return MatchOperand_NoMatch; 5775 5776 if (!isGFX10Plus()) { 5777 Error(Loc, "unified format is not supported on this GPU"); 5778 return MatchOperand_ParseFail; 5779 } 5780 5781 Format = Id; 5782 return MatchOperand_Success; 5783 } 5784 5785 OperandMatchResultTy 5786 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5787 using namespace llvm::AMDGPU::MTBUFFormat; 5788 SMLoc Loc = getLoc(); 5789 5790 if (!parseExpr(Format)) 5791 return MatchOperand_ParseFail; 5792 if (!isValidFormatEncoding(Format, getSTI())) { 5793 Error(Loc, "out of range format"); 5794 return MatchOperand_ParseFail; 5795 } 5796 5797 return MatchOperand_Success; 5798 } 5799 5800 OperandMatchResultTy 5801 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5802 using namespace llvm::AMDGPU::MTBUFFormat; 5803 5804 if (!trySkipId("format", AsmToken::Colon)) 5805 return MatchOperand_NoMatch; 5806 5807 if (trySkipToken(AsmToken::LBrac)) { 5808 StringRef FormatStr; 5809 SMLoc Loc = getLoc(); 5810 if (!parseId(FormatStr, "expected a format string")) 5811 return MatchOperand_ParseFail; 5812 5813 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5814 if (Res == MatchOperand_NoMatch) 5815 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5816 if (Res != MatchOperand_Success) 5817 return Res; 5818 5819 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5820 return MatchOperand_ParseFail; 5821 5822 return MatchOperand_Success; 5823 } 5824 5825 return parseNumericFormat(Format); 5826 } 5827 5828 OperandMatchResultTy 5829 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5830 using namespace llvm::AMDGPU::MTBUFFormat; 5831 5832 int64_t Format = getDefaultFormatEncoding(getSTI()); 5833 OperandMatchResultTy Res; 5834 SMLoc Loc = getLoc(); 5835 5836 // Parse legacy format syntax. 5837 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5838 if (Res == MatchOperand_ParseFail) 5839 return Res; 5840 5841 bool FormatFound = (Res == MatchOperand_Success); 5842 5843 Operands.push_back( 5844 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5845 5846 if (FormatFound) 5847 trySkipToken(AsmToken::Comma); 5848 5849 if (isToken(AsmToken::EndOfStatement)) { 5850 // We are expecting an soffset operand, 5851 // but let matcher handle the error. 5852 return MatchOperand_Success; 5853 } 5854 5855 // Parse soffset. 5856 Res = parseRegOrImm(Operands); 5857 if (Res != MatchOperand_Success) 5858 return Res; 5859 5860 trySkipToken(AsmToken::Comma); 5861 5862 if (!FormatFound) { 5863 Res = parseSymbolicOrNumericFormat(Format); 5864 if (Res == MatchOperand_ParseFail) 5865 return Res; 5866 if (Res == MatchOperand_Success) { 5867 auto Size = Operands.size(); 5868 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5869 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5870 Op.setImm(Format); 5871 } 5872 return MatchOperand_Success; 5873 } 5874 5875 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5876 Error(getLoc(), "duplicate format"); 5877 return MatchOperand_ParseFail; 5878 } 5879 return MatchOperand_Success; 5880 } 5881 5882 //===----------------------------------------------------------------------===// 5883 // ds 5884 //===----------------------------------------------------------------------===// 5885 5886 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5887 const OperandVector &Operands) { 5888 OptionalImmIndexMap OptionalIdx; 5889 5890 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5891 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5892 5893 // Add the register arguments 5894 if (Op.isReg()) { 5895 Op.addRegOperands(Inst, 1); 5896 continue; 5897 } 5898 5899 // Handle optional arguments 5900 OptionalIdx[Op.getImmTy()] = i; 5901 } 5902 5903 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5904 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5905 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5906 5907 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5908 } 5909 5910 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5911 bool IsGdsHardcoded) { 5912 OptionalImmIndexMap OptionalIdx; 5913 5914 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5915 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5916 5917 // Add the register arguments 5918 if (Op.isReg()) { 5919 Op.addRegOperands(Inst, 1); 5920 continue; 5921 } 5922 5923 if (Op.isToken() && Op.getToken() == "gds") { 5924 IsGdsHardcoded = true; 5925 continue; 5926 } 5927 5928 // Handle optional arguments 5929 OptionalIdx[Op.getImmTy()] = i; 5930 } 5931 5932 AMDGPUOperand::ImmTy OffsetType = 5933 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5934 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5935 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5936 AMDGPUOperand::ImmTyOffset; 5937 5938 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5939 5940 if (!IsGdsHardcoded) { 5941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5942 } 5943 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5944 } 5945 5946 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5947 OptionalImmIndexMap OptionalIdx; 5948 5949 unsigned OperandIdx[4]; 5950 unsigned EnMask = 0; 5951 int SrcIdx = 0; 5952 5953 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5954 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5955 5956 // Add the register arguments 5957 if (Op.isReg()) { 5958 assert(SrcIdx < 4); 5959 OperandIdx[SrcIdx] = Inst.size(); 5960 Op.addRegOperands(Inst, 1); 5961 ++SrcIdx; 5962 continue; 5963 } 5964 5965 if (Op.isOff()) { 5966 assert(SrcIdx < 4); 5967 OperandIdx[SrcIdx] = Inst.size(); 5968 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5969 ++SrcIdx; 5970 continue; 5971 } 5972 5973 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5974 Op.addImmOperands(Inst, 1); 5975 continue; 5976 } 5977 5978 if (Op.isToken() && Op.getToken() == "done") 5979 continue; 5980 5981 // Handle optional arguments 5982 OptionalIdx[Op.getImmTy()] = i; 5983 } 5984 5985 assert(SrcIdx == 4); 5986 5987 bool Compr = false; 5988 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5989 Compr = true; 5990 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5991 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5992 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5993 } 5994 5995 for (auto i = 0; i < SrcIdx; ++i) { 5996 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5997 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5998 } 5999 } 6000 6001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6003 6004 Inst.addOperand(MCOperand::createImm(EnMask)); 6005 } 6006 6007 //===----------------------------------------------------------------------===// 6008 // s_waitcnt 6009 //===----------------------------------------------------------------------===// 6010 6011 static bool 6012 encodeCnt( 6013 const AMDGPU::IsaVersion ISA, 6014 int64_t &IntVal, 6015 int64_t CntVal, 6016 bool Saturate, 6017 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6018 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6019 { 6020 bool Failed = false; 6021 6022 IntVal = encode(ISA, IntVal, CntVal); 6023 if (CntVal != decode(ISA, IntVal)) { 6024 if (Saturate) { 6025 IntVal = encode(ISA, IntVal, -1); 6026 } else { 6027 Failed = true; 6028 } 6029 } 6030 return Failed; 6031 } 6032 6033 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6034 6035 SMLoc CntLoc = getLoc(); 6036 StringRef CntName = getTokenStr(); 6037 6038 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6039 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6040 return false; 6041 6042 int64_t CntVal; 6043 SMLoc ValLoc = getLoc(); 6044 if (!parseExpr(CntVal)) 6045 return false; 6046 6047 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6048 6049 bool Failed = true; 6050 bool Sat = CntName.endswith("_sat"); 6051 6052 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6053 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6054 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6055 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6056 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6057 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6058 } else { 6059 Error(CntLoc, "invalid counter name " + CntName); 6060 return false; 6061 } 6062 6063 if (Failed) { 6064 Error(ValLoc, "too large value for " + CntName); 6065 return false; 6066 } 6067 6068 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6069 return false; 6070 6071 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6072 if (isToken(AsmToken::EndOfStatement)) { 6073 Error(getLoc(), "expected a counter name"); 6074 return false; 6075 } 6076 } 6077 6078 return true; 6079 } 6080 6081 OperandMatchResultTy 6082 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6083 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6084 int64_t Waitcnt = getWaitcntBitMask(ISA); 6085 SMLoc S = getLoc(); 6086 6087 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6088 while (!isToken(AsmToken::EndOfStatement)) { 6089 if (!parseCnt(Waitcnt)) 6090 return MatchOperand_ParseFail; 6091 } 6092 } else { 6093 if (!parseExpr(Waitcnt)) 6094 return MatchOperand_ParseFail; 6095 } 6096 6097 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6098 return MatchOperand_Success; 6099 } 6100 6101 bool 6102 AMDGPUOperand::isSWaitCnt() const { 6103 return isImm(); 6104 } 6105 6106 //===----------------------------------------------------------------------===// 6107 // hwreg 6108 //===----------------------------------------------------------------------===// 6109 6110 bool 6111 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6112 OperandInfoTy &Offset, 6113 OperandInfoTy &Width) { 6114 using namespace llvm::AMDGPU::Hwreg; 6115 6116 // The register may be specified by name or using a numeric code 6117 HwReg.Loc = getLoc(); 6118 if (isToken(AsmToken::Identifier) && 6119 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6120 HwReg.IsSymbolic = true; 6121 lex(); // skip register name 6122 } else if (!parseExpr(HwReg.Id, "a register name")) { 6123 return false; 6124 } 6125 6126 if (trySkipToken(AsmToken::RParen)) 6127 return true; 6128 6129 // parse optional params 6130 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6131 return false; 6132 6133 Offset.Loc = getLoc(); 6134 if (!parseExpr(Offset.Id)) 6135 return false; 6136 6137 if (!skipToken(AsmToken::Comma, "expected a comma")) 6138 return false; 6139 6140 Width.Loc = getLoc(); 6141 return parseExpr(Width.Id) && 6142 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6143 } 6144 6145 bool 6146 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6147 const OperandInfoTy &Offset, 6148 const OperandInfoTy &Width) { 6149 6150 using namespace llvm::AMDGPU::Hwreg; 6151 6152 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6153 Error(HwReg.Loc, 6154 "specified hardware register is not supported on this GPU"); 6155 return false; 6156 } 6157 if (!isValidHwreg(HwReg.Id)) { 6158 Error(HwReg.Loc, 6159 "invalid code of hardware register: only 6-bit values are legal"); 6160 return false; 6161 } 6162 if (!isValidHwregOffset(Offset.Id)) { 6163 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6164 return false; 6165 } 6166 if (!isValidHwregWidth(Width.Id)) { 6167 Error(Width.Loc, 6168 "invalid bitfield width: only values from 1 to 32 are legal"); 6169 return false; 6170 } 6171 return true; 6172 } 6173 6174 OperandMatchResultTy 6175 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6176 using namespace llvm::AMDGPU::Hwreg; 6177 6178 int64_t ImmVal = 0; 6179 SMLoc Loc = getLoc(); 6180 6181 if (trySkipId("hwreg", AsmToken::LParen)) { 6182 OperandInfoTy HwReg(ID_UNKNOWN_); 6183 OperandInfoTy Offset(OFFSET_DEFAULT_); 6184 OperandInfoTy Width(WIDTH_DEFAULT_); 6185 if (parseHwregBody(HwReg, Offset, Width) && 6186 validateHwreg(HwReg, Offset, Width)) { 6187 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6188 } else { 6189 return MatchOperand_ParseFail; 6190 } 6191 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6192 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6193 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6194 return MatchOperand_ParseFail; 6195 } 6196 } else { 6197 return MatchOperand_ParseFail; 6198 } 6199 6200 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6201 return MatchOperand_Success; 6202 } 6203 6204 bool AMDGPUOperand::isHwreg() const { 6205 return isImmTy(ImmTyHwreg); 6206 } 6207 6208 //===----------------------------------------------------------------------===// 6209 // sendmsg 6210 //===----------------------------------------------------------------------===// 6211 6212 bool 6213 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6214 OperandInfoTy &Op, 6215 OperandInfoTy &Stream) { 6216 using namespace llvm::AMDGPU::SendMsg; 6217 6218 Msg.Loc = getLoc(); 6219 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6220 Msg.IsSymbolic = true; 6221 lex(); // skip message name 6222 } else if (!parseExpr(Msg.Id, "a message name")) { 6223 return false; 6224 } 6225 6226 if (trySkipToken(AsmToken::Comma)) { 6227 Op.IsDefined = true; 6228 Op.Loc = getLoc(); 6229 if (isToken(AsmToken::Identifier) && 6230 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6231 lex(); // skip operation name 6232 } else if (!parseExpr(Op.Id, "an operation name")) { 6233 return false; 6234 } 6235 6236 if (trySkipToken(AsmToken::Comma)) { 6237 Stream.IsDefined = true; 6238 Stream.Loc = getLoc(); 6239 if (!parseExpr(Stream.Id)) 6240 return false; 6241 } 6242 } 6243 6244 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6245 } 6246 6247 bool 6248 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6249 const OperandInfoTy &Op, 6250 const OperandInfoTy &Stream) { 6251 using namespace llvm::AMDGPU::SendMsg; 6252 6253 // Validation strictness depends on whether message is specified 6254 // in a symbolc or in a numeric form. In the latter case 6255 // only encoding possibility is checked. 6256 bool Strict = Msg.IsSymbolic; 6257 6258 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6259 Error(Msg.Loc, "invalid message id"); 6260 return false; 6261 } 6262 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6263 if (Op.IsDefined) { 6264 Error(Op.Loc, "message does not support operations"); 6265 } else { 6266 Error(Msg.Loc, "missing message operation"); 6267 } 6268 return false; 6269 } 6270 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6271 Error(Op.Loc, "invalid operation id"); 6272 return false; 6273 } 6274 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6275 Error(Stream.Loc, "message operation does not support streams"); 6276 return false; 6277 } 6278 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6279 Error(Stream.Loc, "invalid message stream id"); 6280 return false; 6281 } 6282 return true; 6283 } 6284 6285 OperandMatchResultTy 6286 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6287 using namespace llvm::AMDGPU::SendMsg; 6288 6289 int64_t ImmVal = 0; 6290 SMLoc Loc = getLoc(); 6291 6292 if (trySkipId("sendmsg", AsmToken::LParen)) { 6293 OperandInfoTy Msg(ID_UNKNOWN_); 6294 OperandInfoTy Op(OP_NONE_); 6295 OperandInfoTy Stream(STREAM_ID_NONE_); 6296 if (parseSendMsgBody(Msg, Op, Stream) && 6297 validateSendMsg(Msg, Op, Stream)) { 6298 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6299 } else { 6300 return MatchOperand_ParseFail; 6301 } 6302 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6303 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6304 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6305 return MatchOperand_ParseFail; 6306 } 6307 } else { 6308 return MatchOperand_ParseFail; 6309 } 6310 6311 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6312 return MatchOperand_Success; 6313 } 6314 6315 bool AMDGPUOperand::isSendMsg() const { 6316 return isImmTy(ImmTySendMsg); 6317 } 6318 6319 //===----------------------------------------------------------------------===// 6320 // v_interp 6321 //===----------------------------------------------------------------------===// 6322 6323 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6324 StringRef Str; 6325 SMLoc S = getLoc(); 6326 6327 if (!parseId(Str)) 6328 return MatchOperand_NoMatch; 6329 6330 int Slot = StringSwitch<int>(Str) 6331 .Case("p10", 0) 6332 .Case("p20", 1) 6333 .Case("p0", 2) 6334 .Default(-1); 6335 6336 if (Slot == -1) { 6337 Error(S, "invalid interpolation slot"); 6338 return MatchOperand_ParseFail; 6339 } 6340 6341 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6342 AMDGPUOperand::ImmTyInterpSlot)); 6343 return MatchOperand_Success; 6344 } 6345 6346 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6347 StringRef Str; 6348 SMLoc S = getLoc(); 6349 6350 if (!parseId(Str)) 6351 return MatchOperand_NoMatch; 6352 6353 if (!Str.startswith("attr")) { 6354 Error(S, "invalid interpolation attribute"); 6355 return MatchOperand_ParseFail; 6356 } 6357 6358 StringRef Chan = Str.take_back(2); 6359 int AttrChan = StringSwitch<int>(Chan) 6360 .Case(".x", 0) 6361 .Case(".y", 1) 6362 .Case(".z", 2) 6363 .Case(".w", 3) 6364 .Default(-1); 6365 if (AttrChan == -1) { 6366 Error(S, "invalid or missing interpolation attribute channel"); 6367 return MatchOperand_ParseFail; 6368 } 6369 6370 Str = Str.drop_back(2).drop_front(4); 6371 6372 uint8_t Attr; 6373 if (Str.getAsInteger(10, Attr)) { 6374 Error(S, "invalid or missing interpolation attribute number"); 6375 return MatchOperand_ParseFail; 6376 } 6377 6378 if (Attr > 63) { 6379 Error(S, "out of bounds interpolation attribute number"); 6380 return MatchOperand_ParseFail; 6381 } 6382 6383 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6384 6385 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6386 AMDGPUOperand::ImmTyInterpAttr)); 6387 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6388 AMDGPUOperand::ImmTyAttrChan)); 6389 return MatchOperand_Success; 6390 } 6391 6392 //===----------------------------------------------------------------------===// 6393 // exp 6394 //===----------------------------------------------------------------------===// 6395 6396 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6397 using namespace llvm::AMDGPU::Exp; 6398 6399 StringRef Str; 6400 SMLoc S = getLoc(); 6401 6402 if (!parseId(Str)) 6403 return MatchOperand_NoMatch; 6404 6405 unsigned Id = getTgtId(Str); 6406 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6407 Error(S, (Id == ET_INVALID) ? 6408 "invalid exp target" : 6409 "exp target is not supported on this GPU"); 6410 return MatchOperand_ParseFail; 6411 } 6412 6413 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6414 AMDGPUOperand::ImmTyExpTgt)); 6415 return MatchOperand_Success; 6416 } 6417 6418 //===----------------------------------------------------------------------===// 6419 // parser helpers 6420 //===----------------------------------------------------------------------===// 6421 6422 bool 6423 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6424 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6425 } 6426 6427 bool 6428 AMDGPUAsmParser::isId(const StringRef Id) const { 6429 return isId(getToken(), Id); 6430 } 6431 6432 bool 6433 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6434 return getTokenKind() == Kind; 6435 } 6436 6437 bool 6438 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6439 if (isId(Id)) { 6440 lex(); 6441 return true; 6442 } 6443 return false; 6444 } 6445 6446 bool 6447 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6448 if (isToken(AsmToken::Identifier)) { 6449 StringRef Tok = getTokenStr(); 6450 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6451 lex(); 6452 return true; 6453 } 6454 } 6455 return false; 6456 } 6457 6458 bool 6459 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6460 if (isId(Id) && peekToken().is(Kind)) { 6461 lex(); 6462 lex(); 6463 return true; 6464 } 6465 return false; 6466 } 6467 6468 bool 6469 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6470 if (isToken(Kind)) { 6471 lex(); 6472 return true; 6473 } 6474 return false; 6475 } 6476 6477 bool 6478 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6479 const StringRef ErrMsg) { 6480 if (!trySkipToken(Kind)) { 6481 Error(getLoc(), ErrMsg); 6482 return false; 6483 } 6484 return true; 6485 } 6486 6487 bool 6488 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6489 SMLoc S = getLoc(); 6490 6491 const MCExpr *Expr; 6492 if (Parser.parseExpression(Expr)) 6493 return false; 6494 6495 if (Expr->evaluateAsAbsolute(Imm)) 6496 return true; 6497 6498 if (Expected.empty()) { 6499 Error(S, "expected absolute expression"); 6500 } else { 6501 Error(S, Twine("expected ", Expected) + 6502 Twine(" or an absolute expression")); 6503 } 6504 return false; 6505 } 6506 6507 bool 6508 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6509 SMLoc S = getLoc(); 6510 6511 const MCExpr *Expr; 6512 if (Parser.parseExpression(Expr)) 6513 return false; 6514 6515 int64_t IntVal; 6516 if (Expr->evaluateAsAbsolute(IntVal)) { 6517 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6518 } else { 6519 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6520 } 6521 return true; 6522 } 6523 6524 bool 6525 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6526 if (isToken(AsmToken::String)) { 6527 Val = getToken().getStringContents(); 6528 lex(); 6529 return true; 6530 } else { 6531 Error(getLoc(), ErrMsg); 6532 return false; 6533 } 6534 } 6535 6536 bool 6537 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6538 if (isToken(AsmToken::Identifier)) { 6539 Val = getTokenStr(); 6540 lex(); 6541 return true; 6542 } else { 6543 if (!ErrMsg.empty()) 6544 Error(getLoc(), ErrMsg); 6545 return false; 6546 } 6547 } 6548 6549 AsmToken 6550 AMDGPUAsmParser::getToken() const { 6551 return Parser.getTok(); 6552 } 6553 6554 AsmToken 6555 AMDGPUAsmParser::peekToken() { 6556 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6557 } 6558 6559 void 6560 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6561 auto TokCount = getLexer().peekTokens(Tokens); 6562 6563 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6564 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6565 } 6566 6567 AsmToken::TokenKind 6568 AMDGPUAsmParser::getTokenKind() const { 6569 return getLexer().getKind(); 6570 } 6571 6572 SMLoc 6573 AMDGPUAsmParser::getLoc() const { 6574 return getToken().getLoc(); 6575 } 6576 6577 StringRef 6578 AMDGPUAsmParser::getTokenStr() const { 6579 return getToken().getString(); 6580 } 6581 6582 void 6583 AMDGPUAsmParser::lex() { 6584 Parser.Lex(); 6585 } 6586 6587 SMLoc 6588 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6589 const OperandVector &Operands) const { 6590 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6591 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6592 if (Test(Op)) 6593 return Op.getStartLoc(); 6594 } 6595 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6596 } 6597 6598 SMLoc 6599 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6600 const OperandVector &Operands) const { 6601 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6602 return getOperandLoc(Test, Operands); 6603 } 6604 6605 SMLoc 6606 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6607 const OperandVector &Operands) const { 6608 auto Test = [=](const AMDGPUOperand& Op) { 6609 return Op.isRegKind() && Op.getReg() == Reg; 6610 }; 6611 return getOperandLoc(Test, Operands); 6612 } 6613 6614 SMLoc 6615 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6616 auto Test = [](const AMDGPUOperand& Op) { 6617 return Op.IsImmKindLiteral() || Op.isExpr(); 6618 }; 6619 return getOperandLoc(Test, Operands); 6620 } 6621 6622 SMLoc 6623 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6624 auto Test = [](const AMDGPUOperand& Op) { 6625 return Op.isImmKindConst(); 6626 }; 6627 return getOperandLoc(Test, Operands); 6628 } 6629 6630 //===----------------------------------------------------------------------===// 6631 // swizzle 6632 //===----------------------------------------------------------------------===// 6633 6634 LLVM_READNONE 6635 static unsigned 6636 encodeBitmaskPerm(const unsigned AndMask, 6637 const unsigned OrMask, 6638 const unsigned XorMask) { 6639 using namespace llvm::AMDGPU::Swizzle; 6640 6641 return BITMASK_PERM_ENC | 6642 (AndMask << BITMASK_AND_SHIFT) | 6643 (OrMask << BITMASK_OR_SHIFT) | 6644 (XorMask << BITMASK_XOR_SHIFT); 6645 } 6646 6647 bool 6648 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6649 const unsigned MinVal, 6650 const unsigned MaxVal, 6651 const StringRef ErrMsg, 6652 SMLoc &Loc) { 6653 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6654 return false; 6655 } 6656 Loc = getLoc(); 6657 if (!parseExpr(Op)) { 6658 return false; 6659 } 6660 if (Op < MinVal || Op > MaxVal) { 6661 Error(Loc, ErrMsg); 6662 return false; 6663 } 6664 6665 return true; 6666 } 6667 6668 bool 6669 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6670 const unsigned MinVal, 6671 const unsigned MaxVal, 6672 const StringRef ErrMsg) { 6673 SMLoc Loc; 6674 for (unsigned i = 0; i < OpNum; ++i) { 6675 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6676 return false; 6677 } 6678 6679 return true; 6680 } 6681 6682 bool 6683 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6684 using namespace llvm::AMDGPU::Swizzle; 6685 6686 int64_t Lane[LANE_NUM]; 6687 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6688 "expected a 2-bit lane id")) { 6689 Imm = QUAD_PERM_ENC; 6690 for (unsigned I = 0; I < LANE_NUM; ++I) { 6691 Imm |= Lane[I] << (LANE_SHIFT * I); 6692 } 6693 return true; 6694 } 6695 return false; 6696 } 6697 6698 bool 6699 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6700 using namespace llvm::AMDGPU::Swizzle; 6701 6702 SMLoc Loc; 6703 int64_t GroupSize; 6704 int64_t LaneIdx; 6705 6706 if (!parseSwizzleOperand(GroupSize, 6707 2, 32, 6708 "group size must be in the interval [2,32]", 6709 Loc)) { 6710 return false; 6711 } 6712 if (!isPowerOf2_64(GroupSize)) { 6713 Error(Loc, "group size must be a power of two"); 6714 return false; 6715 } 6716 if (parseSwizzleOperand(LaneIdx, 6717 0, GroupSize - 1, 6718 "lane id must be in the interval [0,group size - 1]", 6719 Loc)) { 6720 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6721 return true; 6722 } 6723 return false; 6724 } 6725 6726 bool 6727 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6728 using namespace llvm::AMDGPU::Swizzle; 6729 6730 SMLoc Loc; 6731 int64_t GroupSize; 6732 6733 if (!parseSwizzleOperand(GroupSize, 6734 2, 32, 6735 "group size must be in the interval [2,32]", 6736 Loc)) { 6737 return false; 6738 } 6739 if (!isPowerOf2_64(GroupSize)) { 6740 Error(Loc, "group size must be a power of two"); 6741 return false; 6742 } 6743 6744 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6745 return true; 6746 } 6747 6748 bool 6749 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6750 using namespace llvm::AMDGPU::Swizzle; 6751 6752 SMLoc Loc; 6753 int64_t GroupSize; 6754 6755 if (!parseSwizzleOperand(GroupSize, 6756 1, 16, 6757 "group size must be in the interval [1,16]", 6758 Loc)) { 6759 return false; 6760 } 6761 if (!isPowerOf2_64(GroupSize)) { 6762 Error(Loc, "group size must be a power of two"); 6763 return false; 6764 } 6765 6766 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6767 return true; 6768 } 6769 6770 bool 6771 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6772 using namespace llvm::AMDGPU::Swizzle; 6773 6774 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6775 return false; 6776 } 6777 6778 StringRef Ctl; 6779 SMLoc StrLoc = getLoc(); 6780 if (!parseString(Ctl)) { 6781 return false; 6782 } 6783 if (Ctl.size() != BITMASK_WIDTH) { 6784 Error(StrLoc, "expected a 5-character mask"); 6785 return false; 6786 } 6787 6788 unsigned AndMask = 0; 6789 unsigned OrMask = 0; 6790 unsigned XorMask = 0; 6791 6792 for (size_t i = 0; i < Ctl.size(); ++i) { 6793 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6794 switch(Ctl[i]) { 6795 default: 6796 Error(StrLoc, "invalid mask"); 6797 return false; 6798 case '0': 6799 break; 6800 case '1': 6801 OrMask |= Mask; 6802 break; 6803 case 'p': 6804 AndMask |= Mask; 6805 break; 6806 case 'i': 6807 AndMask |= Mask; 6808 XorMask |= Mask; 6809 break; 6810 } 6811 } 6812 6813 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6814 return true; 6815 } 6816 6817 bool 6818 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6819 6820 SMLoc OffsetLoc = getLoc(); 6821 6822 if (!parseExpr(Imm, "a swizzle macro")) { 6823 return false; 6824 } 6825 if (!isUInt<16>(Imm)) { 6826 Error(OffsetLoc, "expected a 16-bit offset"); 6827 return false; 6828 } 6829 return true; 6830 } 6831 6832 bool 6833 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6834 using namespace llvm::AMDGPU::Swizzle; 6835 6836 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6837 6838 SMLoc ModeLoc = getLoc(); 6839 bool Ok = false; 6840 6841 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6842 Ok = parseSwizzleQuadPerm(Imm); 6843 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6844 Ok = parseSwizzleBitmaskPerm(Imm); 6845 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6846 Ok = parseSwizzleBroadcast(Imm); 6847 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6848 Ok = parseSwizzleSwap(Imm); 6849 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6850 Ok = parseSwizzleReverse(Imm); 6851 } else { 6852 Error(ModeLoc, "expected a swizzle mode"); 6853 } 6854 6855 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6856 } 6857 6858 return false; 6859 } 6860 6861 OperandMatchResultTy 6862 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6863 SMLoc S = getLoc(); 6864 int64_t Imm = 0; 6865 6866 if (trySkipId("offset")) { 6867 6868 bool Ok = false; 6869 if (skipToken(AsmToken::Colon, "expected a colon")) { 6870 if (trySkipId("swizzle")) { 6871 Ok = parseSwizzleMacro(Imm); 6872 } else { 6873 Ok = parseSwizzleOffset(Imm); 6874 } 6875 } 6876 6877 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6878 6879 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6880 } else { 6881 // Swizzle "offset" operand is optional. 6882 // If it is omitted, try parsing other optional operands. 6883 return parseOptionalOpr(Operands); 6884 } 6885 } 6886 6887 bool 6888 AMDGPUOperand::isSwizzle() const { 6889 return isImmTy(ImmTySwizzle); 6890 } 6891 6892 //===----------------------------------------------------------------------===// 6893 // VGPR Index Mode 6894 //===----------------------------------------------------------------------===// 6895 6896 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6897 6898 using namespace llvm::AMDGPU::VGPRIndexMode; 6899 6900 if (trySkipToken(AsmToken::RParen)) { 6901 return OFF; 6902 } 6903 6904 int64_t Imm = 0; 6905 6906 while (true) { 6907 unsigned Mode = 0; 6908 SMLoc S = getLoc(); 6909 6910 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6911 if (trySkipId(IdSymbolic[ModeId])) { 6912 Mode = 1 << ModeId; 6913 break; 6914 } 6915 } 6916 6917 if (Mode == 0) { 6918 Error(S, (Imm == 0)? 6919 "expected a VGPR index mode or a closing parenthesis" : 6920 "expected a VGPR index mode"); 6921 return UNDEF; 6922 } 6923 6924 if (Imm & Mode) { 6925 Error(S, "duplicate VGPR index mode"); 6926 return UNDEF; 6927 } 6928 Imm |= Mode; 6929 6930 if (trySkipToken(AsmToken::RParen)) 6931 break; 6932 if (!skipToken(AsmToken::Comma, 6933 "expected a comma or a closing parenthesis")) 6934 return UNDEF; 6935 } 6936 6937 return Imm; 6938 } 6939 6940 OperandMatchResultTy 6941 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6942 6943 using namespace llvm::AMDGPU::VGPRIndexMode; 6944 6945 int64_t Imm = 0; 6946 SMLoc S = getLoc(); 6947 6948 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6949 Imm = parseGPRIdxMacro(); 6950 if (Imm == UNDEF) 6951 return MatchOperand_ParseFail; 6952 } else { 6953 if (getParser().parseAbsoluteExpression(Imm)) 6954 return MatchOperand_ParseFail; 6955 if (Imm < 0 || !isUInt<4>(Imm)) { 6956 Error(S, "invalid immediate: only 4-bit values are legal"); 6957 return MatchOperand_ParseFail; 6958 } 6959 } 6960 6961 Operands.push_back( 6962 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6963 return MatchOperand_Success; 6964 } 6965 6966 bool AMDGPUOperand::isGPRIdxMode() const { 6967 return isImmTy(ImmTyGprIdxMode); 6968 } 6969 6970 //===----------------------------------------------------------------------===// 6971 // sopp branch targets 6972 //===----------------------------------------------------------------------===// 6973 6974 OperandMatchResultTy 6975 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6976 6977 // Make sure we are not parsing something 6978 // that looks like a label or an expression but is not. 6979 // This will improve error messages. 6980 if (isRegister() || isModifier()) 6981 return MatchOperand_NoMatch; 6982 6983 if (!parseExpr(Operands)) 6984 return MatchOperand_ParseFail; 6985 6986 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6987 assert(Opr.isImm() || Opr.isExpr()); 6988 SMLoc Loc = Opr.getStartLoc(); 6989 6990 // Currently we do not support arbitrary expressions as branch targets. 6991 // Only labels and absolute expressions are accepted. 6992 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6993 Error(Loc, "expected an absolute expression or a label"); 6994 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6995 Error(Loc, "expected a 16-bit signed jump offset"); 6996 } 6997 6998 return MatchOperand_Success; 6999 } 7000 7001 //===----------------------------------------------------------------------===// 7002 // Boolean holding registers 7003 //===----------------------------------------------------------------------===// 7004 7005 OperandMatchResultTy 7006 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7007 return parseReg(Operands); 7008 } 7009 7010 //===----------------------------------------------------------------------===// 7011 // mubuf 7012 //===----------------------------------------------------------------------===// 7013 7014 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7015 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7016 } 7017 7018 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7019 const OperandVector &Operands, 7020 bool IsAtomic, 7021 bool IsLds) { 7022 bool IsLdsOpcode = IsLds; 7023 bool HasLdsModifier = false; 7024 OptionalImmIndexMap OptionalIdx; 7025 unsigned FirstOperandIdx = 1; 7026 bool IsAtomicReturn = false; 7027 7028 if (IsAtomic) { 7029 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7030 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7031 if (!Op.isCPol()) 7032 continue; 7033 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7034 break; 7035 } 7036 7037 if (!IsAtomicReturn) { 7038 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7039 if (NewOpc != -1) 7040 Inst.setOpcode(NewOpc); 7041 } 7042 7043 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7044 SIInstrFlags::IsAtomicRet; 7045 } 7046 7047 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7048 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7049 7050 // Add the register arguments 7051 if (Op.isReg()) { 7052 Op.addRegOperands(Inst, 1); 7053 // Insert a tied src for atomic return dst. 7054 // This cannot be postponed as subsequent calls to 7055 // addImmOperands rely on correct number of MC operands. 7056 if (IsAtomicReturn && i == FirstOperandIdx) 7057 Op.addRegOperands(Inst, 1); 7058 continue; 7059 } 7060 7061 // Handle the case where soffset is an immediate 7062 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7063 Op.addImmOperands(Inst, 1); 7064 continue; 7065 } 7066 7067 HasLdsModifier |= Op.isLDS(); 7068 7069 // Handle tokens like 'offen' which are sometimes hard-coded into the 7070 // asm string. There are no MCInst operands for these. 7071 if (Op.isToken()) { 7072 continue; 7073 } 7074 assert(Op.isImm()); 7075 7076 // Handle optional arguments 7077 OptionalIdx[Op.getImmTy()] = i; 7078 } 7079 7080 // This is a workaround for an llvm quirk which may result in an 7081 // incorrect instruction selection. Lds and non-lds versions of 7082 // MUBUF instructions are identical except that lds versions 7083 // have mandatory 'lds' modifier. However this modifier follows 7084 // optional modifiers and llvm asm matcher regards this 'lds' 7085 // modifier as an optional one. As a result, an lds version 7086 // of opcode may be selected even if it has no 'lds' modifier. 7087 if (IsLdsOpcode && !HasLdsModifier) { 7088 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7089 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7090 Inst.setOpcode(NoLdsOpcode); 7091 IsLdsOpcode = false; 7092 } 7093 } 7094 7095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7096 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7097 7098 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7099 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7100 } 7101 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7102 } 7103 7104 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7105 OptionalImmIndexMap OptionalIdx; 7106 7107 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7108 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7109 7110 // Add the register arguments 7111 if (Op.isReg()) { 7112 Op.addRegOperands(Inst, 1); 7113 continue; 7114 } 7115 7116 // Handle the case where soffset is an immediate 7117 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7118 Op.addImmOperands(Inst, 1); 7119 continue; 7120 } 7121 7122 // Handle tokens like 'offen' which are sometimes hard-coded into the 7123 // asm string. There are no MCInst operands for these. 7124 if (Op.isToken()) { 7125 continue; 7126 } 7127 assert(Op.isImm()); 7128 7129 // Handle optional arguments 7130 OptionalIdx[Op.getImmTy()] = i; 7131 } 7132 7133 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7134 AMDGPUOperand::ImmTyOffset); 7135 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7136 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7137 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7138 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7139 } 7140 7141 //===----------------------------------------------------------------------===// 7142 // mimg 7143 //===----------------------------------------------------------------------===// 7144 7145 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7146 bool IsAtomic) { 7147 unsigned I = 1; 7148 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7149 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7150 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7151 } 7152 7153 if (IsAtomic) { 7154 // Add src, same as dst 7155 assert(Desc.getNumDefs() == 1); 7156 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7157 } 7158 7159 OptionalImmIndexMap OptionalIdx; 7160 7161 for (unsigned E = Operands.size(); I != E; ++I) { 7162 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7163 7164 // Add the register arguments 7165 if (Op.isReg()) { 7166 Op.addRegOperands(Inst, 1); 7167 } else if (Op.isImmModifier()) { 7168 OptionalIdx[Op.getImmTy()] = I; 7169 } else if (!Op.isToken()) { 7170 llvm_unreachable("unexpected operand type"); 7171 } 7172 } 7173 7174 bool IsGFX10Plus = isGFX10Plus(); 7175 7176 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7177 if (IsGFX10Plus) 7178 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7179 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7180 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7181 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7182 if (IsGFX10Plus) 7183 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7184 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7185 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7186 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7187 if (!IsGFX10Plus) 7188 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7189 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7190 } 7191 7192 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7193 cvtMIMG(Inst, Operands, true); 7194 } 7195 7196 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7197 OptionalImmIndexMap OptionalIdx; 7198 bool IsAtomicReturn = false; 7199 7200 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7201 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7202 if (!Op.isCPol()) 7203 continue; 7204 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7205 break; 7206 } 7207 7208 if (!IsAtomicReturn) { 7209 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7210 if (NewOpc != -1) 7211 Inst.setOpcode(NewOpc); 7212 } 7213 7214 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7215 SIInstrFlags::IsAtomicRet; 7216 7217 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7218 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7219 7220 // Add the register arguments 7221 if (Op.isReg()) { 7222 Op.addRegOperands(Inst, 1); 7223 if (IsAtomicReturn && i == 1) 7224 Op.addRegOperands(Inst, 1); 7225 continue; 7226 } 7227 7228 // Handle the case where soffset is an immediate 7229 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7230 Op.addImmOperands(Inst, 1); 7231 continue; 7232 } 7233 7234 // Handle tokens like 'offen' which are sometimes hard-coded into the 7235 // asm string. There are no MCInst operands for these. 7236 if (Op.isToken()) { 7237 continue; 7238 } 7239 assert(Op.isImm()); 7240 7241 // Handle optional arguments 7242 OptionalIdx[Op.getImmTy()] = i; 7243 } 7244 7245 if ((int)Inst.getNumOperands() <= 7246 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7247 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7249 } 7250 7251 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7252 const OperandVector &Operands) { 7253 for (unsigned I = 1; I < Operands.size(); ++I) { 7254 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7255 if (Operand.isReg()) 7256 Operand.addRegOperands(Inst, 1); 7257 } 7258 7259 Inst.addOperand(MCOperand::createImm(1)); // a16 7260 } 7261 7262 //===----------------------------------------------------------------------===// 7263 // smrd 7264 //===----------------------------------------------------------------------===// 7265 7266 bool AMDGPUOperand::isSMRDOffset8() const { 7267 return isImm() && isUInt<8>(getImm()); 7268 } 7269 7270 bool AMDGPUOperand::isSMEMOffset() const { 7271 return isImm(); // Offset range is checked later by validator. 7272 } 7273 7274 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7275 // 32-bit literals are only supported on CI and we only want to use them 7276 // when the offset is > 8-bits. 7277 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7278 } 7279 7280 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7281 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7282 } 7283 7284 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7285 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7286 } 7287 7288 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7289 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7290 } 7291 7292 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7293 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7294 } 7295 7296 //===----------------------------------------------------------------------===// 7297 // vop3 7298 //===----------------------------------------------------------------------===// 7299 7300 static bool ConvertOmodMul(int64_t &Mul) { 7301 if (Mul != 1 && Mul != 2 && Mul != 4) 7302 return false; 7303 7304 Mul >>= 1; 7305 return true; 7306 } 7307 7308 static bool ConvertOmodDiv(int64_t &Div) { 7309 if (Div == 1) { 7310 Div = 0; 7311 return true; 7312 } 7313 7314 if (Div == 2) { 7315 Div = 3; 7316 return true; 7317 } 7318 7319 return false; 7320 } 7321 7322 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7323 // This is intentional and ensures compatibility with sp3. 7324 // See bug 35397 for details. 7325 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7326 if (BoundCtrl == 0 || BoundCtrl == 1) { 7327 BoundCtrl = 1; 7328 return true; 7329 } 7330 return false; 7331 } 7332 7333 // Note: the order in this table matches the order of operands in AsmString. 7334 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7335 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7336 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7337 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7338 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7339 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7340 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7341 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7342 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7343 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7344 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7345 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7346 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7347 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7348 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7349 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7350 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7351 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7352 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7353 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7354 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7355 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7356 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7357 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7358 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7359 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7360 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7361 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7362 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7363 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7364 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7365 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7366 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7367 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7368 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7369 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7370 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7371 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7372 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7373 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7374 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7375 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7376 }; 7377 7378 void AMDGPUAsmParser::onBeginOfFile() { 7379 if (!getParser().getStreamer().getTargetStreamer() || 7380 getSTI().getTargetTriple().getArch() == Triple::r600) 7381 return; 7382 7383 if (!getTargetStreamer().getTargetID()) 7384 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7385 7386 if (isHsaAbiVersion3Or4(&getSTI())) 7387 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7388 } 7389 7390 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7391 7392 OperandMatchResultTy res = parseOptionalOpr(Operands); 7393 7394 // This is a hack to enable hardcoded mandatory operands which follow 7395 // optional operands. 7396 // 7397 // Current design assumes that all operands after the first optional operand 7398 // are also optional. However implementation of some instructions violates 7399 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7400 // 7401 // To alleviate this problem, we have to (implicitly) parse extra operands 7402 // to make sure autogenerated parser of custom operands never hit hardcoded 7403 // mandatory operands. 7404 7405 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7406 if (res != MatchOperand_Success || 7407 isToken(AsmToken::EndOfStatement)) 7408 break; 7409 7410 trySkipToken(AsmToken::Comma); 7411 res = parseOptionalOpr(Operands); 7412 } 7413 7414 return res; 7415 } 7416 7417 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7418 OperandMatchResultTy res; 7419 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7420 // try to parse any optional operand here 7421 if (Op.IsBit) { 7422 res = parseNamedBit(Op.Name, Operands, Op.Type); 7423 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7424 res = parseOModOperand(Operands); 7425 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7426 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7427 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7428 res = parseSDWASel(Operands, Op.Name, Op.Type); 7429 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7430 res = parseSDWADstUnused(Operands); 7431 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7432 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7433 Op.Type == AMDGPUOperand::ImmTyNegLo || 7434 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7435 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7436 Op.ConvertResult); 7437 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7438 res = parseDim(Operands); 7439 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7440 res = parseCPol(Operands); 7441 } else { 7442 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7443 } 7444 if (res != MatchOperand_NoMatch) { 7445 return res; 7446 } 7447 } 7448 return MatchOperand_NoMatch; 7449 } 7450 7451 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7452 StringRef Name = getTokenStr(); 7453 if (Name == "mul") { 7454 return parseIntWithPrefix("mul", Operands, 7455 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7456 } 7457 7458 if (Name == "div") { 7459 return parseIntWithPrefix("div", Operands, 7460 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7461 } 7462 7463 return MatchOperand_NoMatch; 7464 } 7465 7466 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7467 cvtVOP3P(Inst, Operands); 7468 7469 int Opc = Inst.getOpcode(); 7470 7471 int SrcNum; 7472 const int Ops[] = { AMDGPU::OpName::src0, 7473 AMDGPU::OpName::src1, 7474 AMDGPU::OpName::src2 }; 7475 for (SrcNum = 0; 7476 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7477 ++SrcNum); 7478 assert(SrcNum > 0); 7479 7480 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7481 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7482 7483 if ((OpSel & (1 << SrcNum)) != 0) { 7484 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7485 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7486 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7487 } 7488 } 7489 7490 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7491 // 1. This operand is input modifiers 7492 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7493 // 2. This is not last operand 7494 && Desc.NumOperands > (OpNum + 1) 7495 // 3. Next operand is register class 7496 && Desc.OpInfo[OpNum + 1].RegClass != -1 7497 // 4. Next register is not tied to any other operand 7498 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7499 } 7500 7501 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7502 { 7503 OptionalImmIndexMap OptionalIdx; 7504 unsigned Opc = Inst.getOpcode(); 7505 7506 unsigned I = 1; 7507 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7508 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7509 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7510 } 7511 7512 for (unsigned E = Operands.size(); I != E; ++I) { 7513 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7514 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7515 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7516 } else if (Op.isInterpSlot() || 7517 Op.isInterpAttr() || 7518 Op.isAttrChan()) { 7519 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7520 } else if (Op.isImmModifier()) { 7521 OptionalIdx[Op.getImmTy()] = I; 7522 } else { 7523 llvm_unreachable("unhandled operand type"); 7524 } 7525 } 7526 7527 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7528 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7529 } 7530 7531 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7532 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7533 } 7534 7535 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7536 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7537 } 7538 } 7539 7540 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7541 OptionalImmIndexMap &OptionalIdx) { 7542 unsigned Opc = Inst.getOpcode(); 7543 7544 unsigned I = 1; 7545 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7546 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7547 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7548 } 7549 7550 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7551 // This instruction has src modifiers 7552 for (unsigned E = Operands.size(); I != E; ++I) { 7553 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7554 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7555 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7556 } else if (Op.isImmModifier()) { 7557 OptionalIdx[Op.getImmTy()] = I; 7558 } else if (Op.isRegOrImm()) { 7559 Op.addRegOrImmOperands(Inst, 1); 7560 } else { 7561 llvm_unreachable("unhandled operand type"); 7562 } 7563 } 7564 } else { 7565 // No src modifiers 7566 for (unsigned E = Operands.size(); I != E; ++I) { 7567 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7568 if (Op.isMod()) { 7569 OptionalIdx[Op.getImmTy()] = I; 7570 } else { 7571 Op.addRegOrImmOperands(Inst, 1); 7572 } 7573 } 7574 } 7575 7576 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7578 } 7579 7580 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7581 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7582 } 7583 7584 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7585 // it has src2 register operand that is tied to dst operand 7586 // we don't allow modifiers for this operand in assembler so src2_modifiers 7587 // should be 0. 7588 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7589 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7590 Opc == AMDGPU::V_MAC_F32_e64_vi || 7591 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7592 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7593 Opc == AMDGPU::V_MAC_F16_e64_vi || 7594 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7595 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7596 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7597 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7598 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7599 auto it = Inst.begin(); 7600 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7601 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7602 ++it; 7603 // Copy the operand to ensure it's not invalidated when Inst grows. 7604 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7605 } 7606 } 7607 7608 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7609 OptionalImmIndexMap OptionalIdx; 7610 cvtVOP3(Inst, Operands, OptionalIdx); 7611 } 7612 7613 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7614 OptionalImmIndexMap &OptIdx) { 7615 const int Opc = Inst.getOpcode(); 7616 const MCInstrDesc &Desc = MII.get(Opc); 7617 7618 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7619 7620 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7621 assert(!IsPacked); 7622 Inst.addOperand(Inst.getOperand(0)); 7623 } 7624 7625 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7626 // instruction, and then figure out where to actually put the modifiers 7627 7628 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7629 if (OpSelIdx != -1) { 7630 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7631 } 7632 7633 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7634 if (OpSelHiIdx != -1) { 7635 int DefaultVal = IsPacked ? -1 : 0; 7636 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7637 DefaultVal); 7638 } 7639 7640 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7641 if (NegLoIdx != -1) { 7642 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7643 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7644 } 7645 7646 const int Ops[] = { AMDGPU::OpName::src0, 7647 AMDGPU::OpName::src1, 7648 AMDGPU::OpName::src2 }; 7649 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7650 AMDGPU::OpName::src1_modifiers, 7651 AMDGPU::OpName::src2_modifiers }; 7652 7653 unsigned OpSel = 0; 7654 unsigned OpSelHi = 0; 7655 unsigned NegLo = 0; 7656 unsigned NegHi = 0; 7657 7658 if (OpSelIdx != -1) 7659 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7660 7661 if (OpSelHiIdx != -1) 7662 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7663 7664 if (NegLoIdx != -1) { 7665 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7666 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7667 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7668 } 7669 7670 for (int J = 0; J < 3; ++J) { 7671 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7672 if (OpIdx == -1) 7673 break; 7674 7675 uint32_t ModVal = 0; 7676 7677 if ((OpSel & (1 << J)) != 0) 7678 ModVal |= SISrcMods::OP_SEL_0; 7679 7680 if ((OpSelHi & (1 << J)) != 0) 7681 ModVal |= SISrcMods::OP_SEL_1; 7682 7683 if ((NegLo & (1 << J)) != 0) 7684 ModVal |= SISrcMods::NEG; 7685 7686 if ((NegHi & (1 << J)) != 0) 7687 ModVal |= SISrcMods::NEG_HI; 7688 7689 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7690 7691 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7692 } 7693 } 7694 7695 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7696 OptionalImmIndexMap OptIdx; 7697 cvtVOP3(Inst, Operands, OptIdx); 7698 cvtVOP3P(Inst, Operands, OptIdx); 7699 } 7700 7701 //===----------------------------------------------------------------------===// 7702 // dpp 7703 //===----------------------------------------------------------------------===// 7704 7705 bool AMDGPUOperand::isDPP8() const { 7706 return isImmTy(ImmTyDPP8); 7707 } 7708 7709 bool AMDGPUOperand::isDPPCtrl() const { 7710 using namespace AMDGPU::DPP; 7711 7712 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7713 if (result) { 7714 int64_t Imm = getImm(); 7715 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7716 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7717 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7718 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7719 (Imm == DppCtrl::WAVE_SHL1) || 7720 (Imm == DppCtrl::WAVE_ROL1) || 7721 (Imm == DppCtrl::WAVE_SHR1) || 7722 (Imm == DppCtrl::WAVE_ROR1) || 7723 (Imm == DppCtrl::ROW_MIRROR) || 7724 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7725 (Imm == DppCtrl::BCAST15) || 7726 (Imm == DppCtrl::BCAST31) || 7727 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7728 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7729 } 7730 return false; 7731 } 7732 7733 //===----------------------------------------------------------------------===// 7734 // mAI 7735 //===----------------------------------------------------------------------===// 7736 7737 bool AMDGPUOperand::isBLGP() const { 7738 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7739 } 7740 7741 bool AMDGPUOperand::isCBSZ() const { 7742 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7743 } 7744 7745 bool AMDGPUOperand::isABID() const { 7746 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7747 } 7748 7749 bool AMDGPUOperand::isS16Imm() const { 7750 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7751 } 7752 7753 bool AMDGPUOperand::isU16Imm() const { 7754 return isImm() && isUInt<16>(getImm()); 7755 } 7756 7757 //===----------------------------------------------------------------------===// 7758 // dim 7759 //===----------------------------------------------------------------------===// 7760 7761 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7762 // We want to allow "dim:1D" etc., 7763 // but the initial 1 is tokenized as an integer. 7764 std::string Token; 7765 if (isToken(AsmToken::Integer)) { 7766 SMLoc Loc = getToken().getEndLoc(); 7767 Token = std::string(getTokenStr()); 7768 lex(); 7769 if (getLoc() != Loc) 7770 return false; 7771 } 7772 7773 StringRef Suffix; 7774 if (!parseId(Suffix)) 7775 return false; 7776 Token += Suffix; 7777 7778 StringRef DimId = Token; 7779 if (DimId.startswith("SQ_RSRC_IMG_")) 7780 DimId = DimId.drop_front(12); 7781 7782 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7783 if (!DimInfo) 7784 return false; 7785 7786 Encoding = DimInfo->Encoding; 7787 return true; 7788 } 7789 7790 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7791 if (!isGFX10Plus()) 7792 return MatchOperand_NoMatch; 7793 7794 SMLoc S = getLoc(); 7795 7796 if (!trySkipId("dim", AsmToken::Colon)) 7797 return MatchOperand_NoMatch; 7798 7799 unsigned Encoding; 7800 SMLoc Loc = getLoc(); 7801 if (!parseDimId(Encoding)) { 7802 Error(Loc, "invalid dim value"); 7803 return MatchOperand_ParseFail; 7804 } 7805 7806 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7807 AMDGPUOperand::ImmTyDim)); 7808 return MatchOperand_Success; 7809 } 7810 7811 //===----------------------------------------------------------------------===// 7812 // dpp 7813 //===----------------------------------------------------------------------===// 7814 7815 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7816 SMLoc S = getLoc(); 7817 7818 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7819 return MatchOperand_NoMatch; 7820 7821 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7822 7823 int64_t Sels[8]; 7824 7825 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7826 return MatchOperand_ParseFail; 7827 7828 for (size_t i = 0; i < 8; ++i) { 7829 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7830 return MatchOperand_ParseFail; 7831 7832 SMLoc Loc = getLoc(); 7833 if (getParser().parseAbsoluteExpression(Sels[i])) 7834 return MatchOperand_ParseFail; 7835 if (0 > Sels[i] || 7 < Sels[i]) { 7836 Error(Loc, "expected a 3-bit value"); 7837 return MatchOperand_ParseFail; 7838 } 7839 } 7840 7841 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7842 return MatchOperand_ParseFail; 7843 7844 unsigned DPP8 = 0; 7845 for (size_t i = 0; i < 8; ++i) 7846 DPP8 |= (Sels[i] << (i * 3)); 7847 7848 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7849 return MatchOperand_Success; 7850 } 7851 7852 bool 7853 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7854 const OperandVector &Operands) { 7855 if (Ctrl == "row_newbcast") 7856 return isGFX90A(); 7857 7858 if (Ctrl == "row_share" || 7859 Ctrl == "row_xmask") 7860 return isGFX10Plus(); 7861 7862 if (Ctrl == "wave_shl" || 7863 Ctrl == "wave_shr" || 7864 Ctrl == "wave_rol" || 7865 Ctrl == "wave_ror" || 7866 Ctrl == "row_bcast") 7867 return isVI() || isGFX9(); 7868 7869 return Ctrl == "row_mirror" || 7870 Ctrl == "row_half_mirror" || 7871 Ctrl == "quad_perm" || 7872 Ctrl == "row_shl" || 7873 Ctrl == "row_shr" || 7874 Ctrl == "row_ror"; 7875 } 7876 7877 int64_t 7878 AMDGPUAsmParser::parseDPPCtrlPerm() { 7879 // quad_perm:[%d,%d,%d,%d] 7880 7881 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7882 return -1; 7883 7884 int64_t Val = 0; 7885 for (int i = 0; i < 4; ++i) { 7886 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7887 return -1; 7888 7889 int64_t Temp; 7890 SMLoc Loc = getLoc(); 7891 if (getParser().parseAbsoluteExpression(Temp)) 7892 return -1; 7893 if (Temp < 0 || Temp > 3) { 7894 Error(Loc, "expected a 2-bit value"); 7895 return -1; 7896 } 7897 7898 Val += (Temp << i * 2); 7899 } 7900 7901 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7902 return -1; 7903 7904 return Val; 7905 } 7906 7907 int64_t 7908 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7909 using namespace AMDGPU::DPP; 7910 7911 // sel:%d 7912 7913 int64_t Val; 7914 SMLoc Loc = getLoc(); 7915 7916 if (getParser().parseAbsoluteExpression(Val)) 7917 return -1; 7918 7919 struct DppCtrlCheck { 7920 int64_t Ctrl; 7921 int Lo; 7922 int Hi; 7923 }; 7924 7925 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7926 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7927 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7928 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7929 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7930 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7931 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7932 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7933 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7934 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7935 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7936 .Default({-1, 0, 0}); 7937 7938 bool Valid; 7939 if (Check.Ctrl == -1) { 7940 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7941 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7942 } else { 7943 Valid = Check.Lo <= Val && Val <= Check.Hi; 7944 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7945 } 7946 7947 if (!Valid) { 7948 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7949 return -1; 7950 } 7951 7952 return Val; 7953 } 7954 7955 OperandMatchResultTy 7956 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7957 using namespace AMDGPU::DPP; 7958 7959 if (!isToken(AsmToken::Identifier) || 7960 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7961 return MatchOperand_NoMatch; 7962 7963 SMLoc S = getLoc(); 7964 int64_t Val = -1; 7965 StringRef Ctrl; 7966 7967 parseId(Ctrl); 7968 7969 if (Ctrl == "row_mirror") { 7970 Val = DppCtrl::ROW_MIRROR; 7971 } else if (Ctrl == "row_half_mirror") { 7972 Val = DppCtrl::ROW_HALF_MIRROR; 7973 } else { 7974 if (skipToken(AsmToken::Colon, "expected a colon")) { 7975 if (Ctrl == "quad_perm") { 7976 Val = parseDPPCtrlPerm(); 7977 } else { 7978 Val = parseDPPCtrlSel(Ctrl); 7979 } 7980 } 7981 } 7982 7983 if (Val == -1) 7984 return MatchOperand_ParseFail; 7985 7986 Operands.push_back( 7987 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7988 return MatchOperand_Success; 7989 } 7990 7991 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7992 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7993 } 7994 7995 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7996 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7997 } 7998 7999 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8000 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8001 } 8002 8003 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8004 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8005 } 8006 8007 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8008 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8009 } 8010 8011 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8012 OptionalImmIndexMap OptionalIdx; 8013 8014 unsigned Opc = Inst.getOpcode(); 8015 bool HasModifiers = 8016 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8017 unsigned I = 1; 8018 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8019 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8020 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8021 } 8022 8023 int Fi = 0; 8024 for (unsigned E = Operands.size(); I != E; ++I) { 8025 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8026 MCOI::TIED_TO); 8027 if (TiedTo != -1) { 8028 assert((unsigned)TiedTo < Inst.getNumOperands()); 8029 // handle tied old or src2 for MAC instructions 8030 Inst.addOperand(Inst.getOperand(TiedTo)); 8031 } 8032 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8033 // Add the register arguments 8034 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8035 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8036 // Skip it. 8037 continue; 8038 } 8039 8040 if (IsDPP8) { 8041 if (Op.isDPP8()) { 8042 Op.addImmOperands(Inst, 1); 8043 } else if (HasModifiers && 8044 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8045 Op.addRegWithFPInputModsOperands(Inst, 2); 8046 } else if (Op.isFI()) { 8047 Fi = Op.getImm(); 8048 } else if (Op.isReg()) { 8049 Op.addRegOperands(Inst, 1); 8050 } else { 8051 llvm_unreachable("Invalid operand type"); 8052 } 8053 } else { 8054 if (HasModifiers && 8055 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8056 Op.addRegWithFPInputModsOperands(Inst, 2); 8057 } else if (Op.isReg()) { 8058 Op.addRegOperands(Inst, 1); 8059 } else if (Op.isDPPCtrl()) { 8060 Op.addImmOperands(Inst, 1); 8061 } else if (Op.isImm()) { 8062 // Handle optional arguments 8063 OptionalIdx[Op.getImmTy()] = I; 8064 } else { 8065 llvm_unreachable("Invalid operand type"); 8066 } 8067 } 8068 } 8069 8070 if (IsDPP8) { 8071 using namespace llvm::AMDGPU::DPP; 8072 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8073 } else { 8074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8077 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8079 } 8080 } 8081 } 8082 8083 //===----------------------------------------------------------------------===// 8084 // sdwa 8085 //===----------------------------------------------------------------------===// 8086 8087 OperandMatchResultTy 8088 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8089 AMDGPUOperand::ImmTy Type) { 8090 using namespace llvm::AMDGPU::SDWA; 8091 8092 SMLoc S = getLoc(); 8093 StringRef Value; 8094 OperandMatchResultTy res; 8095 8096 SMLoc StringLoc; 8097 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8098 if (res != MatchOperand_Success) { 8099 return res; 8100 } 8101 8102 int64_t Int; 8103 Int = StringSwitch<int64_t>(Value) 8104 .Case("BYTE_0", SdwaSel::BYTE_0) 8105 .Case("BYTE_1", SdwaSel::BYTE_1) 8106 .Case("BYTE_2", SdwaSel::BYTE_2) 8107 .Case("BYTE_3", SdwaSel::BYTE_3) 8108 .Case("WORD_0", SdwaSel::WORD_0) 8109 .Case("WORD_1", SdwaSel::WORD_1) 8110 .Case("DWORD", SdwaSel::DWORD) 8111 .Default(0xffffffff); 8112 8113 if (Int == 0xffffffff) { 8114 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8115 return MatchOperand_ParseFail; 8116 } 8117 8118 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8119 return MatchOperand_Success; 8120 } 8121 8122 OperandMatchResultTy 8123 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8124 using namespace llvm::AMDGPU::SDWA; 8125 8126 SMLoc S = getLoc(); 8127 StringRef Value; 8128 OperandMatchResultTy res; 8129 8130 SMLoc StringLoc; 8131 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8132 if (res != MatchOperand_Success) { 8133 return res; 8134 } 8135 8136 int64_t Int; 8137 Int = StringSwitch<int64_t>(Value) 8138 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8139 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8140 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8141 .Default(0xffffffff); 8142 8143 if (Int == 0xffffffff) { 8144 Error(StringLoc, "invalid dst_unused value"); 8145 return MatchOperand_ParseFail; 8146 } 8147 8148 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8149 return MatchOperand_Success; 8150 } 8151 8152 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8153 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8154 } 8155 8156 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8157 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8158 } 8159 8160 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8161 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8162 } 8163 8164 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8165 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8166 } 8167 8168 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8169 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8170 } 8171 8172 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8173 uint64_t BasicInstType, 8174 bool SkipDstVcc, 8175 bool SkipSrcVcc) { 8176 using namespace llvm::AMDGPU::SDWA; 8177 8178 OptionalImmIndexMap OptionalIdx; 8179 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8180 bool SkippedVcc = false; 8181 8182 unsigned I = 1; 8183 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8184 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8185 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8186 } 8187 8188 for (unsigned E = Operands.size(); I != E; ++I) { 8189 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8190 if (SkipVcc && !SkippedVcc && Op.isReg() && 8191 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8192 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8193 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8194 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8195 // Skip VCC only if we didn't skip it on previous iteration. 8196 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8197 if (BasicInstType == SIInstrFlags::VOP2 && 8198 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8199 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8200 SkippedVcc = true; 8201 continue; 8202 } else if (BasicInstType == SIInstrFlags::VOPC && 8203 Inst.getNumOperands() == 0) { 8204 SkippedVcc = true; 8205 continue; 8206 } 8207 } 8208 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8209 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8210 } else if (Op.isImm()) { 8211 // Handle optional arguments 8212 OptionalIdx[Op.getImmTy()] = I; 8213 } else { 8214 llvm_unreachable("Invalid operand type"); 8215 } 8216 SkippedVcc = false; 8217 } 8218 8219 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8220 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8221 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8222 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8223 switch (BasicInstType) { 8224 case SIInstrFlags::VOP1: 8225 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8226 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8227 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8228 } 8229 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8230 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8231 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8232 break; 8233 8234 case SIInstrFlags::VOP2: 8235 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8236 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8237 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8238 } 8239 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8240 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8241 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8242 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8243 break; 8244 8245 case SIInstrFlags::VOPC: 8246 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8247 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8250 break; 8251 8252 default: 8253 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8254 } 8255 } 8256 8257 // special case v_mac_{f16, f32}: 8258 // it has src2 register operand that is tied to dst operand 8259 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8260 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8261 auto it = Inst.begin(); 8262 std::advance( 8263 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8264 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8265 } 8266 } 8267 8268 //===----------------------------------------------------------------------===// 8269 // mAI 8270 //===----------------------------------------------------------------------===// 8271 8272 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8273 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8274 } 8275 8276 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8277 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8278 } 8279 8280 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8281 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8282 } 8283 8284 /// Force static initialization. 8285 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8286 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8287 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8288 } 8289 8290 #define GET_REGISTER_MATCHER 8291 #define GET_MATCHER_IMPLEMENTATION 8292 #define GET_MNEMONIC_SPELL_CHECKER 8293 #define GET_MNEMONIC_CHECKER 8294 #include "AMDGPUGenAsmMatcher.inc" 8295 8296 // This fuction should be defined after auto-generated include so that we have 8297 // MatchClassKind enum defined 8298 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8299 unsigned Kind) { 8300 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8301 // But MatchInstructionImpl() expects to meet token and fails to validate 8302 // operand. This method checks if we are given immediate operand but expect to 8303 // get corresponding token. 8304 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8305 switch (Kind) { 8306 case MCK_addr64: 8307 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8308 case MCK_gds: 8309 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8310 case MCK_lds: 8311 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8312 case MCK_idxen: 8313 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8314 case MCK_offen: 8315 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8316 case MCK_SSrcB32: 8317 // When operands have expression values, they will return true for isToken, 8318 // because it is not possible to distinguish between a token and an 8319 // expression at parse time. MatchInstructionImpl() will always try to 8320 // match an operand as a token, when isToken returns true, and when the 8321 // name of the expression is not a valid token, the match will fail, 8322 // so we need to handle it here. 8323 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8324 case MCK_SSrcF32: 8325 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8326 case MCK_SoppBrTarget: 8327 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8328 case MCK_VReg32OrOff: 8329 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8330 case MCK_InterpSlot: 8331 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8332 case MCK_Attr: 8333 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8334 case MCK_AttrChan: 8335 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8336 case MCK_ImmSMEMOffset: 8337 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8338 case MCK_SReg_64: 8339 case MCK_SReg_64_XEXEC: 8340 // Null is defined as a 32-bit register but 8341 // it should also be enabled with 64-bit operands. 8342 // The following code enables it for SReg_64 operands 8343 // used as source and destination. Remaining source 8344 // operands are handled in isInlinableImm. 8345 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8346 default: 8347 return Match_InvalidOperand; 8348 } 8349 } 8350 8351 //===----------------------------------------------------------------------===// 8352 // endpgm 8353 //===----------------------------------------------------------------------===// 8354 8355 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8356 SMLoc S = getLoc(); 8357 int64_t Imm = 0; 8358 8359 if (!parseExpr(Imm)) { 8360 // The operand is optional, if not present default to 0 8361 Imm = 0; 8362 } 8363 8364 if (!isUInt<16>(Imm)) { 8365 Error(S, "expected a 16-bit value"); 8366 return MatchOperand_ParseFail; 8367 } 8368 8369 Operands.push_back( 8370 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8371 return MatchOperand_Success; 8372 } 8373 8374 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8375