1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/MC/TargetRegistry.h" 32 #include "llvm/Support/AMDGPUMetadata.h" 33 #include "llvm/Support/AMDHSAKernelDescriptor.h" 34 #include "llvm/Support/Casting.h" 35 #include "llvm/Support/MachineValueType.h" 36 #include "llvm/Support/TargetParser.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrInline(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type); 251 } 252 253 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 254 return isRegOrInline(RCID, type) || isLiteralImm(type); 255 } 256 257 bool isRegOrImmWithInt16InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 259 } 260 261 bool isRegOrImmWithInt32InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 263 } 264 265 bool isRegOrImmWithInt64InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 267 } 268 269 bool isRegOrImmWithFP16InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 271 } 272 273 bool isRegOrImmWithFP32InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 275 } 276 277 bool isRegOrImmWithFP64InputMods() const { 278 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 279 } 280 281 bool isVReg() const { 282 return isRegClass(AMDGPU::VGPR_32RegClassID) || 283 isRegClass(AMDGPU::VReg_64RegClassID) || 284 isRegClass(AMDGPU::VReg_96RegClassID) || 285 isRegClass(AMDGPU::VReg_128RegClassID) || 286 isRegClass(AMDGPU::VReg_160RegClassID) || 287 isRegClass(AMDGPU::VReg_192RegClassID) || 288 isRegClass(AMDGPU::VReg_256RegClassID) || 289 isRegClass(AMDGPU::VReg_512RegClassID) || 290 isRegClass(AMDGPU::VReg_1024RegClassID); 291 } 292 293 bool isVReg32() const { 294 return isRegClass(AMDGPU::VGPR_32RegClassID); 295 } 296 297 bool isVReg32OrOff() const { 298 return isOff() || isVReg32(); 299 } 300 301 bool isNull() const { 302 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 303 } 304 305 bool isVRegWithInputMods() const; 306 307 bool isSDWAOperand(MVT type) const; 308 bool isSDWAFP16Operand() const; 309 bool isSDWAFP32Operand() const; 310 bool isSDWAInt16Operand() const; 311 bool isSDWAInt32Operand() const; 312 313 bool isImmTy(ImmTy ImmT) const { 314 return isImm() && Imm.Type == ImmT; 315 } 316 317 bool isImmModifier() const { 318 return isImm() && Imm.Type != ImmTyNone; 319 } 320 321 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 322 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 323 bool isDMask() const { return isImmTy(ImmTyDMask); } 324 bool isDim() const { return isImmTy(ImmTyDim); } 325 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 326 bool isDA() const { return isImmTy(ImmTyDA); } 327 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 328 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 329 bool isLWE() const { return isImmTy(ImmTyLWE); } 330 bool isOff() const { return isImmTy(ImmTyOff); } 331 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 332 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 333 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 334 bool isOffen() const { return isImmTy(ImmTyOffen); } 335 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 336 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 337 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 338 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 339 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 340 341 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 342 bool isGDS() const { return isImmTy(ImmTyGDS); } 343 bool isLDS() const { return isImmTy(ImmTyLDS); } 344 bool isCPol() const { return isImmTy(ImmTyCPol); } 345 bool isSWZ() const { return isImmTy(ImmTySWZ); } 346 bool isTFE() const { return isImmTy(ImmTyTFE); } 347 bool isD16() const { return isImmTy(ImmTyD16); } 348 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 349 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 350 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 351 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 352 bool isFI() const { return isImmTy(ImmTyDppFi); } 353 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 354 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 355 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 356 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 357 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 358 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 359 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 360 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 361 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 362 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 363 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 364 bool isHigh() const { return isImmTy(ImmTyHigh); } 365 366 bool isMod() const { 367 return isClampSI() || isOModSI(); 368 } 369 370 bool isRegOrImm() const { 371 return isReg() || isImm(); 372 } 373 374 bool isRegClass(unsigned RCID) const; 375 376 bool isInlineValue() const; 377 378 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 379 return isRegOrInline(RCID, type) && !hasModifiers(); 380 } 381 382 bool isSCSrcB16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 384 } 385 386 bool isSCSrcV2B16() const { 387 return isSCSrcB16(); 388 } 389 390 bool isSCSrcB32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 392 } 393 394 bool isSCSrcB64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 396 } 397 398 bool isBoolReg() const; 399 400 bool isSCSrcF16() const { 401 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 402 } 403 404 bool isSCSrcV2F16() const { 405 return isSCSrcF16(); 406 } 407 408 bool isSCSrcF32() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 410 } 411 412 bool isSCSrcF64() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 414 } 415 416 bool isSSrcB32() const { 417 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 418 } 419 420 bool isSSrcB16() const { 421 return isSCSrcB16() || isLiteralImm(MVT::i16); 422 } 423 424 bool isSSrcV2B16() const { 425 llvm_unreachable("cannot happen"); 426 return isSSrcB16(); 427 } 428 429 bool isSSrcB64() const { 430 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 431 // See isVSrc64(). 432 return isSCSrcB64() || isLiteralImm(MVT::i64); 433 } 434 435 bool isSSrcF32() const { 436 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 437 } 438 439 bool isSSrcF64() const { 440 return isSCSrcB64() || isLiteralImm(MVT::f64); 441 } 442 443 bool isSSrcF16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::f16); 445 } 446 447 bool isSSrcV2F16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcF16(); 450 } 451 452 bool isSSrcV2FP32() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF32(); 455 } 456 457 bool isSCSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSCSrcF32(); 460 } 461 462 bool isSSrcV2INT32() const { 463 llvm_unreachable("cannot happen"); 464 return isSSrcB32(); 465 } 466 467 bool isSCSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSCSrcB32(); 470 } 471 472 bool isSSrcOrLdsB32() const { 473 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 474 isLiteralImm(MVT::i32) || isExpr(); 475 } 476 477 bool isVCSrcB32() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 479 } 480 481 bool isVCSrcB64() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 483 } 484 485 bool isVCSrcB16() const { 486 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 487 } 488 489 bool isVCSrcV2B16() const { 490 return isVCSrcB16(); 491 } 492 493 bool isVCSrcF32() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 495 } 496 497 bool isVCSrcF64() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 499 } 500 501 bool isVCSrcF16() const { 502 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 503 } 504 505 bool isVCSrcV2F16() const { 506 return isVCSrcF16(); 507 } 508 509 bool isVSrcB32() const { 510 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 511 } 512 513 bool isVSrcB64() const { 514 return isVCSrcF64() || isLiteralImm(MVT::i64); 515 } 516 517 bool isVSrcB16() const { 518 return isVCSrcB16() || isLiteralImm(MVT::i16); 519 } 520 521 bool isVSrcV2B16() const { 522 return isVSrcB16() || isLiteralImm(MVT::v2i16); 523 } 524 525 bool isVCSrcV2FP32() const { 526 return isVCSrcF64(); 527 } 528 529 bool isVSrcV2FP32() const { 530 return isVSrcF64() || isLiteralImm(MVT::v2f32); 531 } 532 533 bool isVCSrcV2INT32() const { 534 return isVCSrcB64(); 535 } 536 537 bool isVSrcV2INT32() const { 538 return isVSrcB64() || isLiteralImm(MVT::v2i32); 539 } 540 541 bool isVSrcF32() const { 542 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 543 } 544 545 bool isVSrcF64() const { 546 return isVCSrcF64() || isLiteralImm(MVT::f64); 547 } 548 549 bool isVSrcF16() const { 550 return isVCSrcF16() || isLiteralImm(MVT::f16); 551 } 552 553 bool isVSrcV2F16() const { 554 return isVSrcF16() || isLiteralImm(MVT::v2f16); 555 } 556 557 bool isVISrcB32() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 559 } 560 561 bool isVISrcB16() const { 562 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 563 } 564 565 bool isVISrcV2B16() const { 566 return isVISrcB16(); 567 } 568 569 bool isVISrcF32() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 571 } 572 573 bool isVISrcF16() const { 574 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 575 } 576 577 bool isVISrcV2F16() const { 578 return isVISrcF16() || isVISrcB32(); 579 } 580 581 bool isVISrc_64B64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 583 } 584 585 bool isVISrc_64F64() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 587 } 588 589 bool isVISrc_64V2FP32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 591 } 592 593 bool isVISrc_64V2INT32() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 595 } 596 597 bool isVISrc_256B64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 599 } 600 601 bool isVISrc_256F64() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 603 } 604 605 bool isVISrc_128B16() const { 606 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 607 } 608 609 bool isVISrc_128V2B16() const { 610 return isVISrc_128B16(); 611 } 612 613 bool isVISrc_128B32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 615 } 616 617 bool isVISrc_128F32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2FP32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 623 } 624 625 bool isVISrc_256V2INT32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B32() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 631 } 632 633 bool isVISrc_512B16() const { 634 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 635 } 636 637 bool isVISrc_512V2B16() const { 638 return isVISrc_512B16(); 639 } 640 641 bool isVISrc_512F32() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 643 } 644 645 bool isVISrc_512F16() const { 646 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 647 } 648 649 bool isVISrc_512V2F16() const { 650 return isVISrc_512F16() || isVISrc_512B32(); 651 } 652 653 bool isVISrc_1024B32() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 655 } 656 657 bool isVISrc_1024B16() const { 658 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 659 } 660 661 bool isVISrc_1024V2B16() const { 662 return isVISrc_1024B16(); 663 } 664 665 bool isVISrc_1024F32() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 667 } 668 669 bool isVISrc_1024F16() const { 670 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 671 } 672 673 bool isVISrc_1024V2F16() const { 674 return isVISrc_1024F16() || isVISrc_1024B32(); 675 } 676 677 bool isAISrcB32() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 679 } 680 681 bool isAISrcB16() const { 682 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 683 } 684 685 bool isAISrcV2B16() const { 686 return isAISrcB16(); 687 } 688 689 bool isAISrcF32() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 691 } 692 693 bool isAISrcF16() const { 694 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 695 } 696 697 bool isAISrcV2F16() const { 698 return isAISrcF16() || isAISrcB32(); 699 } 700 701 bool isAISrc_64B64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 703 } 704 705 bool isAISrc_64F64() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 707 } 708 709 bool isAISrc_128B32() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 711 } 712 713 bool isAISrc_128B16() const { 714 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 715 } 716 717 bool isAISrc_128V2B16() const { 718 return isAISrc_128B16(); 719 } 720 721 bool isAISrc_128F32() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 723 } 724 725 bool isAISrc_128F16() const { 726 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 727 } 728 729 bool isAISrc_128V2F16() const { 730 return isAISrc_128F16() || isAISrc_128B32(); 731 } 732 733 bool isVISrc_128F16() const { 734 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 735 } 736 737 bool isVISrc_128V2F16() const { 738 return isVISrc_128F16() || isVISrc_128B32(); 739 } 740 741 bool isAISrc_256B64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 743 } 744 745 bool isAISrc_256F64() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 747 } 748 749 bool isAISrc_512B32() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 751 } 752 753 bool isAISrc_512B16() const { 754 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 755 } 756 757 bool isAISrc_512V2B16() const { 758 return isAISrc_512B16(); 759 } 760 761 bool isAISrc_512F32() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 763 } 764 765 bool isAISrc_512F16() const { 766 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 767 } 768 769 bool isAISrc_512V2F16() const { 770 return isAISrc_512F16() || isAISrc_512B32(); 771 } 772 773 bool isAISrc_1024B32() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 775 } 776 777 bool isAISrc_1024B16() const { 778 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 779 } 780 781 bool isAISrc_1024V2B16() const { 782 return isAISrc_1024B16(); 783 } 784 785 bool isAISrc_1024F32() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 787 } 788 789 bool isAISrc_1024F16() const { 790 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 791 } 792 793 bool isAISrc_1024V2F16() const { 794 return isAISrc_1024F16() || isAISrc_1024B32(); 795 } 796 797 bool isKImmFP32() const { 798 return isLiteralImm(MVT::f32); 799 } 800 801 bool isKImmFP16() const { 802 return isLiteralImm(MVT::f16); 803 } 804 805 bool isMem() const override { 806 return false; 807 } 808 809 bool isExpr() const { 810 return Kind == Expression; 811 } 812 813 bool isSoppBrTarget() const { 814 return isExpr() || isImm(); 815 } 816 817 bool isSWaitCnt() const; 818 bool isHwreg() const; 819 bool isSendMsg() const; 820 bool isSwizzle() const; 821 bool isSMRDOffset8() const; 822 bool isSMEMOffset() const; 823 bool isSMRDLiteralOffset() const; 824 bool isDPP8() const; 825 bool isDPPCtrl() const; 826 bool isBLGP() const; 827 bool isCBSZ() const; 828 bool isABID() const; 829 bool isGPRIdxMode() const; 830 bool isS16Imm() const; 831 bool isU16Imm() const; 832 bool isEndpgm() const; 833 834 StringRef getExpressionAsToken() const { 835 assert(isExpr()); 836 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 837 return S->getSymbol().getName(); 838 } 839 840 StringRef getToken() const { 841 assert(isToken()); 842 843 if (Kind == Expression) 844 return getExpressionAsToken(); 845 846 return StringRef(Tok.Data, Tok.Length); 847 } 848 849 int64_t getImm() const { 850 assert(isImm()); 851 return Imm.Val; 852 } 853 854 void setImm(int64_t Val) { 855 assert(isImm()); 856 Imm.Val = Val; 857 } 858 859 ImmTy getImmTy() const { 860 assert(isImm()); 861 return Imm.Type; 862 } 863 864 unsigned getReg() const override { 865 assert(isRegKind()); 866 return Reg.RegNo; 867 } 868 869 SMLoc getStartLoc() const override { 870 return StartLoc; 871 } 872 873 SMLoc getEndLoc() const override { 874 return EndLoc; 875 } 876 877 SMRange getLocRange() const { 878 return SMRange(StartLoc, EndLoc); 879 } 880 881 Modifiers getModifiers() const { 882 assert(isRegKind() || isImmTy(ImmTyNone)); 883 return isRegKind() ? Reg.Mods : Imm.Mods; 884 } 885 886 void setModifiers(Modifiers Mods) { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 if (isRegKind()) 889 Reg.Mods = Mods; 890 else 891 Imm.Mods = Mods; 892 } 893 894 bool hasModifiers() const { 895 return getModifiers().hasModifiers(); 896 } 897 898 bool hasFPModifiers() const { 899 return getModifiers().hasFPModifiers(); 900 } 901 902 bool hasIntModifiers() const { 903 return getModifiers().hasIntModifiers(); 904 } 905 906 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 907 908 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 909 910 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 911 912 template <unsigned Bitwidth> 913 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 914 915 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<16>(Inst, N); 917 } 918 919 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 920 addKImmFPOperands<32>(Inst, N); 921 } 922 923 void addRegOperands(MCInst &Inst, unsigned N) const; 924 925 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 926 addRegOperands(Inst, N); 927 } 928 929 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 930 if (isRegKind()) 931 addRegOperands(Inst, N); 932 else if (isExpr()) 933 Inst.addOperand(MCOperand::createExpr(Expr)); 934 else 935 addImmOperands(Inst, N); 936 } 937 938 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 939 Modifiers Mods = getModifiers(); 940 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 941 if (isRegKind()) { 942 addRegOperands(Inst, N); 943 } else { 944 addImmOperands(Inst, N, false); 945 } 946 } 947 948 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 949 assert(!hasIntModifiers()); 950 addRegOrImmWithInputModsOperands(Inst, N); 951 } 952 953 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasFPModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 959 Modifiers Mods = getModifiers(); 960 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 961 assert(isRegKind()); 962 addRegOperands(Inst, N); 963 } 964 965 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 966 assert(!hasIntModifiers()); 967 addRegWithInputModsOperands(Inst, N); 968 } 969 970 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasFPModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 976 if (isImm()) 977 addImmOperands(Inst, N); 978 else { 979 assert(isExpr()); 980 Inst.addOperand(MCOperand::createExpr(Expr)); 981 } 982 } 983 984 static void printImmTy(raw_ostream& OS, ImmTy Type) { 985 switch (Type) { 986 case ImmTyNone: OS << "None"; break; 987 case ImmTyGDS: OS << "GDS"; break; 988 case ImmTyLDS: OS << "LDS"; break; 989 case ImmTyOffen: OS << "Offen"; break; 990 case ImmTyIdxen: OS << "Idxen"; break; 991 case ImmTyAddr64: OS << "Addr64"; break; 992 case ImmTyOffset: OS << "Offset"; break; 993 case ImmTyInstOffset: OS << "InstOffset"; break; 994 case ImmTyOffset0: OS << "Offset0"; break; 995 case ImmTyOffset1: OS << "Offset1"; break; 996 case ImmTyCPol: OS << "CPol"; break; 997 case ImmTySWZ: OS << "SWZ"; break; 998 case ImmTyTFE: OS << "TFE"; break; 999 case ImmTyD16: OS << "D16"; break; 1000 case ImmTyFORMAT: OS << "FORMAT"; break; 1001 case ImmTyClampSI: OS << "ClampSI"; break; 1002 case ImmTyOModSI: OS << "OModSI"; break; 1003 case ImmTyDPP8: OS << "DPP8"; break; 1004 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1005 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1006 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1007 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1008 case ImmTyDppFi: OS << "FI"; break; 1009 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1010 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1011 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1012 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1013 case ImmTyDMask: OS << "DMask"; break; 1014 case ImmTyDim: OS << "Dim"; break; 1015 case ImmTyUNorm: OS << "UNorm"; break; 1016 case ImmTyDA: OS << "DA"; break; 1017 case ImmTyR128A16: OS << "R128A16"; break; 1018 case ImmTyA16: OS << "A16"; break; 1019 case ImmTyLWE: OS << "LWE"; break; 1020 case ImmTyOff: OS << "Off"; break; 1021 case ImmTyExpTgt: OS << "ExpTgt"; break; 1022 case ImmTyExpCompr: OS << "ExpCompr"; break; 1023 case ImmTyExpVM: OS << "ExpVM"; break; 1024 case ImmTyHwreg: OS << "Hwreg"; break; 1025 case ImmTySendMsg: OS << "SendMsg"; break; 1026 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1027 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1028 case ImmTyAttrChan: OS << "AttrChan"; break; 1029 case ImmTyOpSel: OS << "OpSel"; break; 1030 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1031 case ImmTyNegLo: OS << "NegLo"; break; 1032 case ImmTyNegHi: OS << "NegHi"; break; 1033 case ImmTySwizzle: OS << "Swizzle"; break; 1034 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1035 case ImmTyHigh: OS << "High"; break; 1036 case ImmTyBLGP: OS << "BLGP"; break; 1037 case ImmTyCBSZ: OS << "CBSZ"; break; 1038 case ImmTyABID: OS << "ABID"; break; 1039 case ImmTyEndpgm: OS << "Endpgm"; break; 1040 } 1041 } 1042 1043 void print(raw_ostream &OS) const override { 1044 switch (Kind) { 1045 case Register: 1046 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1047 break; 1048 case Immediate: 1049 OS << '<' << getImm(); 1050 if (getImmTy() != ImmTyNone) { 1051 OS << " type: "; printImmTy(OS, getImmTy()); 1052 } 1053 OS << " mods: " << Imm.Mods << '>'; 1054 break; 1055 case Token: 1056 OS << '\'' << getToken() << '\''; 1057 break; 1058 case Expression: 1059 OS << "<expr " << *Expr << '>'; 1060 break; 1061 } 1062 } 1063 1064 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1065 int64_t Val, SMLoc Loc, 1066 ImmTy Type = ImmTyNone, 1067 bool IsFPImm = false) { 1068 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1069 Op->Imm.Val = Val; 1070 Op->Imm.IsFPImm = IsFPImm; 1071 Op->Imm.Kind = ImmKindTyNone; 1072 Op->Imm.Type = Type; 1073 Op->Imm.Mods = Modifiers(); 1074 Op->StartLoc = Loc; 1075 Op->EndLoc = Loc; 1076 return Op; 1077 } 1078 1079 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1080 StringRef Str, SMLoc Loc, 1081 bool HasExplicitEncodingSize = true) { 1082 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1083 Res->Tok.Data = Str.data(); 1084 Res->Tok.Length = Str.size(); 1085 Res->StartLoc = Loc; 1086 Res->EndLoc = Loc; 1087 return Res; 1088 } 1089 1090 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1091 unsigned RegNo, SMLoc S, 1092 SMLoc E) { 1093 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1094 Op->Reg.RegNo = RegNo; 1095 Op->Reg.Mods = Modifiers(); 1096 Op->StartLoc = S; 1097 Op->EndLoc = E; 1098 return Op; 1099 } 1100 1101 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1102 const class MCExpr *Expr, SMLoc S) { 1103 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1104 Op->Expr = Expr; 1105 Op->StartLoc = S; 1106 Op->EndLoc = S; 1107 return Op; 1108 } 1109 }; 1110 1111 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1112 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1113 return OS; 1114 } 1115 1116 //===----------------------------------------------------------------------===// 1117 // AsmParser 1118 //===----------------------------------------------------------------------===// 1119 1120 // Holds info related to the current kernel, e.g. count of SGPRs used. 1121 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1122 // .amdgpu_hsa_kernel or at EOF. 1123 class KernelScopeInfo { 1124 int SgprIndexUnusedMin = -1; 1125 int VgprIndexUnusedMin = -1; 1126 MCContext *Ctx = nullptr; 1127 1128 void usesSgprAt(int i) { 1129 if (i >= SgprIndexUnusedMin) { 1130 SgprIndexUnusedMin = ++i; 1131 if (Ctx) { 1132 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1133 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1134 } 1135 } 1136 } 1137 1138 void usesVgprAt(int i) { 1139 if (i >= VgprIndexUnusedMin) { 1140 VgprIndexUnusedMin = ++i; 1141 if (Ctx) { 1142 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1143 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1144 } 1145 } 1146 } 1147 1148 public: 1149 KernelScopeInfo() = default; 1150 1151 void initialize(MCContext &Context) { 1152 Ctx = &Context; 1153 usesSgprAt(SgprIndexUnusedMin = -1); 1154 usesVgprAt(VgprIndexUnusedMin = -1); 1155 } 1156 1157 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1158 switch (RegKind) { 1159 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1160 case IS_AGPR: // fall through 1161 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1162 default: break; 1163 } 1164 } 1165 }; 1166 1167 class AMDGPUAsmParser : public MCTargetAsmParser { 1168 MCAsmParser &Parser; 1169 1170 // Number of extra operands parsed after the first optional operand. 1171 // This may be necessary to skip hardcoded mandatory operands. 1172 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1173 1174 unsigned ForcedEncodingSize = 0; 1175 bool ForcedDPP = false; 1176 bool ForcedSDWA = false; 1177 KernelScopeInfo KernelScope; 1178 unsigned CPolSeen; 1179 1180 /// @name Auto-generated Match Functions 1181 /// { 1182 1183 #define GET_ASSEMBLER_HEADER 1184 #include "AMDGPUGenAsmMatcher.inc" 1185 1186 /// } 1187 1188 private: 1189 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1190 bool OutOfRangeError(SMRange Range); 1191 /// Calculate VGPR/SGPR blocks required for given target, reserved 1192 /// registers, and user-specified NextFreeXGPR values. 1193 /// 1194 /// \param Features [in] Target features, used for bug corrections. 1195 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1196 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1197 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1198 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1199 /// descriptor field, if valid. 1200 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1201 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1202 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1203 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1204 /// \param VGPRBlocks [out] Result VGPR block count. 1205 /// \param SGPRBlocks [out] Result SGPR block count. 1206 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1207 bool FlatScrUsed, bool XNACKUsed, 1208 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1209 SMRange VGPRRange, unsigned NextFreeSGPR, 1210 SMRange SGPRRange, unsigned &VGPRBlocks, 1211 unsigned &SGPRBlocks); 1212 bool ParseDirectiveAMDGCNTarget(); 1213 bool ParseDirectiveAMDHSAKernel(); 1214 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1215 bool ParseDirectiveHSACodeObjectVersion(); 1216 bool ParseDirectiveHSACodeObjectISA(); 1217 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1218 bool ParseDirectiveAMDKernelCodeT(); 1219 // TODO: Possibly make subtargetHasRegister const. 1220 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1221 bool ParseDirectiveAMDGPUHsaKernel(); 1222 1223 bool ParseDirectiveISAVersion(); 1224 bool ParseDirectiveHSAMetadata(); 1225 bool ParseDirectivePALMetadataBegin(); 1226 bool ParseDirectivePALMetadata(); 1227 bool ParseDirectiveAMDGPULDS(); 1228 1229 /// Common code to parse out a block of text (typically YAML) between start and 1230 /// end directives. 1231 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1232 const char *AssemblerDirectiveEnd, 1233 std::string &CollectString); 1234 1235 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1236 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1237 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1238 unsigned &RegNum, unsigned &RegWidth, 1239 bool RestoreOnFailure = false); 1240 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1241 unsigned &RegNum, unsigned &RegWidth, 1242 SmallVectorImpl<AsmToken> &Tokens); 1243 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1244 unsigned &RegWidth, 1245 SmallVectorImpl<AsmToken> &Tokens); 1246 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1247 unsigned &RegWidth, 1248 SmallVectorImpl<AsmToken> &Tokens); 1249 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1250 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1251 bool ParseRegRange(unsigned& Num, unsigned& Width); 1252 unsigned getRegularReg(RegisterKind RegKind, 1253 unsigned RegNum, 1254 unsigned RegWidth, 1255 SMLoc Loc); 1256 1257 bool isRegister(); 1258 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1259 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1260 void initializeGprCountSymbol(RegisterKind RegKind); 1261 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1262 unsigned RegWidth); 1263 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1264 bool IsAtomic, bool IsLds = false); 1265 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1266 bool IsGdsHardcoded); 1267 1268 public: 1269 enum AMDGPUMatchResultTy { 1270 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1271 }; 1272 enum OperandMode { 1273 OperandMode_Default, 1274 OperandMode_NSA, 1275 }; 1276 1277 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1278 1279 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1280 const MCInstrInfo &MII, 1281 const MCTargetOptions &Options) 1282 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1283 MCAsmParserExtension::Initialize(Parser); 1284 1285 if (getFeatureBits().none()) { 1286 // Set default features. 1287 copySTI().ToggleFeature("southern-islands"); 1288 } 1289 1290 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1291 1292 { 1293 // TODO: make those pre-defined variables read-only. 1294 // Currently there is none suitable machinery in the core llvm-mc for this. 1295 // MCSymbol::isRedefinable is intended for another purpose, and 1296 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1297 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1298 MCContext &Ctx = getContext(); 1299 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1300 MCSymbol *Sym = 1301 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1303 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1304 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1305 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1307 } else { 1308 MCSymbol *Sym = 1309 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1311 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1312 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1313 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1314 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1315 } 1316 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1317 initializeGprCountSymbol(IS_VGPR); 1318 initializeGprCountSymbol(IS_SGPR); 1319 } else 1320 KernelScope.initialize(getContext()); 1321 } 1322 } 1323 1324 bool hasMIMG_R128() const { 1325 return AMDGPU::hasMIMG_R128(getSTI()); 1326 } 1327 1328 bool hasPackedD16() const { 1329 return AMDGPU::hasPackedD16(getSTI()); 1330 } 1331 1332 bool hasGFX10A16() const { 1333 return AMDGPU::hasGFX10A16(getSTI()); 1334 } 1335 1336 bool hasG16() const { return AMDGPU::hasG16(getSTI()); } 1337 1338 bool isSI() const { 1339 return AMDGPU::isSI(getSTI()); 1340 } 1341 1342 bool isCI() const { 1343 return AMDGPU::isCI(getSTI()); 1344 } 1345 1346 bool isVI() const { 1347 return AMDGPU::isVI(getSTI()); 1348 } 1349 1350 bool isGFX9() const { 1351 return AMDGPU::isGFX9(getSTI()); 1352 } 1353 1354 bool isGFX90A() const { 1355 return AMDGPU::isGFX90A(getSTI()); 1356 } 1357 1358 bool isGFX9Plus() const { 1359 return AMDGPU::isGFX9Plus(getSTI()); 1360 } 1361 1362 bool isGFX10() const { 1363 return AMDGPU::isGFX10(getSTI()); 1364 } 1365 1366 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1367 1368 bool isGFX10_BEncoding() const { 1369 return AMDGPU::isGFX10_BEncoding(getSTI()); 1370 } 1371 1372 bool hasInv2PiInlineImm() const { 1373 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1374 } 1375 1376 bool hasFlatOffsets() const { 1377 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1378 } 1379 1380 bool hasArchitectedFlatScratch() const { 1381 return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch]; 1382 } 1383 1384 bool hasSGPR102_SGPR103() const { 1385 return !isVI() && !isGFX9(); 1386 } 1387 1388 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1389 1390 bool hasIntClamp() const { 1391 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1392 } 1393 1394 AMDGPUTargetStreamer &getTargetStreamer() { 1395 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1396 return static_cast<AMDGPUTargetStreamer &>(TS); 1397 } 1398 1399 const MCRegisterInfo *getMRI() const { 1400 // We need this const_cast because for some reason getContext() is not const 1401 // in MCAsmParser. 1402 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1403 } 1404 1405 const MCInstrInfo *getMII() const { 1406 return &MII; 1407 } 1408 1409 const FeatureBitset &getFeatureBits() const { 1410 return getSTI().getFeatureBits(); 1411 } 1412 1413 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1414 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1415 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1416 1417 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1418 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1419 bool isForcedDPP() const { return ForcedDPP; } 1420 bool isForcedSDWA() const { return ForcedSDWA; } 1421 ArrayRef<unsigned> getMatchedVariants() const; 1422 StringRef getMatchedVariantName() const; 1423 1424 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1425 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1426 bool RestoreOnFailure); 1427 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1428 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1429 SMLoc &EndLoc) override; 1430 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1431 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1432 unsigned Kind) override; 1433 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1434 OperandVector &Operands, MCStreamer &Out, 1435 uint64_t &ErrorInfo, 1436 bool MatchingInlineAsm) override; 1437 bool ParseDirective(AsmToken DirectiveID) override; 1438 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1439 OperandMode Mode = OperandMode_Default); 1440 StringRef parseMnemonicSuffix(StringRef Name); 1441 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1442 SMLoc NameLoc, OperandVector &Operands) override; 1443 //bool ProcessInstruction(MCInst &Inst); 1444 1445 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1446 1447 OperandMatchResultTy 1448 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1449 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1450 bool (*ConvertResult)(int64_t &) = nullptr); 1451 1452 OperandMatchResultTy 1453 parseOperandArrayWithPrefix(const char *Prefix, 1454 OperandVector &Operands, 1455 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1456 bool (*ConvertResult)(int64_t&) = nullptr); 1457 1458 OperandMatchResultTy 1459 parseNamedBit(StringRef Name, OperandVector &Operands, 1460 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1461 OperandMatchResultTy parseCPol(OperandVector &Operands); 1462 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1463 StringRef &Value, 1464 SMLoc &StringLoc); 1465 1466 bool isModifier(); 1467 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1468 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1469 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1470 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1471 bool parseSP3NegModifier(); 1472 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1473 OperandMatchResultTy parseReg(OperandVector &Operands); 1474 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1475 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1476 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1477 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1478 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1479 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1480 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1481 OperandMatchResultTy parseUfmt(int64_t &Format); 1482 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1483 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1484 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1485 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1486 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1487 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1488 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1489 1490 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1491 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1492 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1493 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1494 1495 bool parseCnt(int64_t &IntVal); 1496 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1497 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1498 1499 private: 1500 struct OperandInfoTy { 1501 SMLoc Loc; 1502 int64_t Id; 1503 bool IsSymbolic = false; 1504 bool IsDefined = false; 1505 1506 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1507 }; 1508 1509 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1510 bool validateSendMsg(const OperandInfoTy &Msg, 1511 const OperandInfoTy &Op, 1512 const OperandInfoTy &Stream); 1513 1514 bool parseHwregBody(OperandInfoTy &HwReg, 1515 OperandInfoTy &Offset, 1516 OperandInfoTy &Width); 1517 bool validateHwreg(const OperandInfoTy &HwReg, 1518 const OperandInfoTy &Offset, 1519 const OperandInfoTy &Width); 1520 1521 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1522 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1523 1524 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1525 const OperandVector &Operands) const; 1526 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1527 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1528 SMLoc getLitLoc(const OperandVector &Operands) const; 1529 SMLoc getConstLoc(const OperandVector &Operands) const; 1530 1531 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1532 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1534 bool validateSOPLiteral(const MCInst &Inst) const; 1535 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1536 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateIntClampSupported(const MCInst &Inst); 1538 bool validateMIMGAtomicDMask(const MCInst &Inst); 1539 bool validateMIMGGatherDMask(const MCInst &Inst); 1540 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1541 bool validateMIMGDataSize(const MCInst &Inst); 1542 bool validateMIMGAddrSize(const MCInst &Inst); 1543 bool validateMIMGD16(const MCInst &Inst); 1544 bool validateMIMGDim(const MCInst &Inst); 1545 bool validateMIMGMSAA(const MCInst &Inst); 1546 bool validateOpSel(const MCInst &Inst); 1547 bool validateDPP(const MCInst &Inst, const OperandVector &Operands); 1548 bool validateVccOperand(unsigned Reg) const; 1549 bool validateVOPLiteral(const MCInst &Inst, const OperandVector &Operands); 1550 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1551 bool validateAGPRLdSt(const MCInst &Inst) const; 1552 bool validateVGPRAlign(const MCInst &Inst) const; 1553 bool validateGWS(const MCInst &Inst, const OperandVector &Operands); 1554 bool validateDivScale(const MCInst &Inst); 1555 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1556 const SMLoc &IDLoc); 1557 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1558 unsigned getConstantBusLimit(unsigned Opcode) const; 1559 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1560 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1561 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1562 1563 bool isSupportedMnemo(StringRef Mnemo, 1564 const FeatureBitset &FBS); 1565 bool isSupportedMnemo(StringRef Mnemo, 1566 const FeatureBitset &FBS, 1567 ArrayRef<unsigned> Variants); 1568 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1569 1570 bool isId(const StringRef Id) const; 1571 bool isId(const AsmToken &Token, const StringRef Id) const; 1572 bool isToken(const AsmToken::TokenKind Kind) const; 1573 bool trySkipId(const StringRef Id); 1574 bool trySkipId(const StringRef Pref, const StringRef Id); 1575 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1576 bool trySkipToken(const AsmToken::TokenKind Kind); 1577 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1578 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1579 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1580 1581 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1582 AsmToken::TokenKind getTokenKind() const; 1583 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1584 bool parseExpr(OperandVector &Operands); 1585 StringRef getTokenStr() const; 1586 AsmToken peekToken(); 1587 AsmToken getToken() const; 1588 SMLoc getLoc() const; 1589 void lex(); 1590 1591 public: 1592 void onBeginOfFile() override; 1593 1594 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1595 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1596 1597 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1598 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1599 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1600 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1601 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1602 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1603 1604 bool parseSwizzleOperand(int64_t &Op, 1605 const unsigned MinVal, 1606 const unsigned MaxVal, 1607 const StringRef ErrMsg, 1608 SMLoc &Loc); 1609 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1610 const unsigned MinVal, 1611 const unsigned MaxVal, 1612 const StringRef ErrMsg); 1613 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1614 bool parseSwizzleOffset(int64_t &Imm); 1615 bool parseSwizzleMacro(int64_t &Imm); 1616 bool parseSwizzleQuadPerm(int64_t &Imm); 1617 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1618 bool parseSwizzleBroadcast(int64_t &Imm); 1619 bool parseSwizzleSwap(int64_t &Imm); 1620 bool parseSwizzleReverse(int64_t &Imm); 1621 1622 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1623 int64_t parseGPRIdxMacro(); 1624 1625 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1626 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1627 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1628 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1629 1630 AMDGPUOperand::Ptr defaultCPol() const; 1631 1632 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1633 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1634 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1635 AMDGPUOperand::Ptr defaultFlatOffset() const; 1636 1637 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1638 1639 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1640 OptionalImmIndexMap &OptionalIdx); 1641 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1642 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1643 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1644 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 1645 OptionalImmIndexMap &OptionalIdx); 1646 1647 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1648 1649 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1650 bool IsAtomic = false); 1651 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1652 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1653 1654 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1655 1656 bool parseDimId(unsigned &Encoding); 1657 OperandMatchResultTy parseDim(OperandVector &Operands); 1658 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1659 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1660 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1661 int64_t parseDPPCtrlSel(StringRef Ctrl); 1662 int64_t parseDPPCtrlPerm(); 1663 AMDGPUOperand::Ptr defaultRowMask() const; 1664 AMDGPUOperand::Ptr defaultBankMask() const; 1665 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1666 AMDGPUOperand::Ptr defaultFI() const; 1667 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1668 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1669 1670 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1671 AMDGPUOperand::ImmTy Type); 1672 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1673 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1674 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1675 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1676 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1677 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1678 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1679 uint64_t BasicInstType, 1680 bool SkipDstVcc = false, 1681 bool SkipSrcVcc = false); 1682 1683 AMDGPUOperand::Ptr defaultBLGP() const; 1684 AMDGPUOperand::Ptr defaultCBSZ() const; 1685 AMDGPUOperand::Ptr defaultABID() const; 1686 1687 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1688 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1689 }; 1690 1691 struct OptionalOperand { 1692 const char *Name; 1693 AMDGPUOperand::ImmTy Type; 1694 bool IsBit; 1695 bool (*ConvertResult)(int64_t&); 1696 }; 1697 1698 } // end anonymous namespace 1699 1700 // May be called with integer type with equivalent bitwidth. 1701 static const fltSemantics *getFltSemantics(unsigned Size) { 1702 switch (Size) { 1703 case 4: 1704 return &APFloat::IEEEsingle(); 1705 case 8: 1706 return &APFloat::IEEEdouble(); 1707 case 2: 1708 return &APFloat::IEEEhalf(); 1709 default: 1710 llvm_unreachable("unsupported fp type"); 1711 } 1712 } 1713 1714 static const fltSemantics *getFltSemantics(MVT VT) { 1715 return getFltSemantics(VT.getSizeInBits() / 8); 1716 } 1717 1718 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1719 switch (OperandType) { 1720 case AMDGPU::OPERAND_REG_IMM_INT32: 1721 case AMDGPU::OPERAND_REG_IMM_FP32: 1722 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 1723 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1724 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1725 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1727 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1728 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1729 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1730 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1731 case AMDGPU::OPERAND_KIMM32: 1732 return &APFloat::IEEEsingle(); 1733 case AMDGPU::OPERAND_REG_IMM_INT64: 1734 case AMDGPU::OPERAND_REG_IMM_FP64: 1735 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1736 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1737 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1738 return &APFloat::IEEEdouble(); 1739 case AMDGPU::OPERAND_REG_IMM_INT16: 1740 case AMDGPU::OPERAND_REG_IMM_FP16: 1741 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 1742 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1743 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1744 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1745 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1746 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1747 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1748 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1749 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1750 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1751 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1752 case AMDGPU::OPERAND_KIMM16: 1753 return &APFloat::IEEEhalf(); 1754 default: 1755 llvm_unreachable("unsupported fp type"); 1756 } 1757 } 1758 1759 //===----------------------------------------------------------------------===// 1760 // Operand 1761 //===----------------------------------------------------------------------===// 1762 1763 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1764 bool Lost; 1765 1766 // Convert literal to single precision 1767 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1768 APFloat::rmNearestTiesToEven, 1769 &Lost); 1770 // We allow precision lost but not overflow or underflow 1771 if (Status != APFloat::opOK && 1772 Lost && 1773 ((Status & APFloat::opOverflow) != 0 || 1774 (Status & APFloat::opUnderflow) != 0)) { 1775 return false; 1776 } 1777 1778 return true; 1779 } 1780 1781 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1782 return isUIntN(Size, Val) || isIntN(Size, Val); 1783 } 1784 1785 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1786 if (VT.getScalarType() == MVT::i16) { 1787 // FP immediate values are broken. 1788 return isInlinableIntLiteral(Val); 1789 } 1790 1791 // f16/v2f16 operands work correctly for all values. 1792 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1793 } 1794 1795 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1796 1797 // This is a hack to enable named inline values like 1798 // shared_base with both 32-bit and 64-bit operands. 1799 // Note that these values are defined as 1800 // 32-bit operands only. 1801 if (isInlineValue()) { 1802 return true; 1803 } 1804 1805 if (!isImmTy(ImmTyNone)) { 1806 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1807 return false; 1808 } 1809 // TODO: We should avoid using host float here. It would be better to 1810 // check the float bit values which is what a few other places do. 1811 // We've had bot failures before due to weird NaN support on mips hosts. 1812 1813 APInt Literal(64, Imm.Val); 1814 1815 if (Imm.IsFPImm) { // We got fp literal token 1816 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1817 return AMDGPU::isInlinableLiteral64(Imm.Val, 1818 AsmParser->hasInv2PiInlineImm()); 1819 } 1820 1821 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1822 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1823 return false; 1824 1825 if (type.getScalarSizeInBits() == 16) { 1826 return isInlineableLiteralOp16( 1827 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1828 type, AsmParser->hasInv2PiInlineImm()); 1829 } 1830 1831 // Check if single precision literal is inlinable 1832 return AMDGPU::isInlinableLiteral32( 1833 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1834 AsmParser->hasInv2PiInlineImm()); 1835 } 1836 1837 // We got int literal token. 1838 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1839 return AMDGPU::isInlinableLiteral64(Imm.Val, 1840 AsmParser->hasInv2PiInlineImm()); 1841 } 1842 1843 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1844 return false; 1845 } 1846 1847 if (type.getScalarSizeInBits() == 16) { 1848 return isInlineableLiteralOp16( 1849 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1850 type, AsmParser->hasInv2PiInlineImm()); 1851 } 1852 1853 return AMDGPU::isInlinableLiteral32( 1854 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1855 AsmParser->hasInv2PiInlineImm()); 1856 } 1857 1858 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1859 // Check that this immediate can be added as literal 1860 if (!isImmTy(ImmTyNone)) { 1861 return false; 1862 } 1863 1864 if (!Imm.IsFPImm) { 1865 // We got int literal token. 1866 1867 if (type == MVT::f64 && hasFPModifiers()) { 1868 // Cannot apply fp modifiers to int literals preserving the same semantics 1869 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1870 // disable these cases. 1871 return false; 1872 } 1873 1874 unsigned Size = type.getSizeInBits(); 1875 if (Size == 64) 1876 Size = 32; 1877 1878 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1879 // types. 1880 return isSafeTruncation(Imm.Val, Size); 1881 } 1882 1883 // We got fp literal token 1884 if (type == MVT::f64) { // Expected 64-bit fp operand 1885 // We would set low 64-bits of literal to zeroes but we accept this literals 1886 return true; 1887 } 1888 1889 if (type == MVT::i64) { // Expected 64-bit int operand 1890 // We don't allow fp literals in 64-bit integer instructions. It is 1891 // unclear how we should encode them. 1892 return false; 1893 } 1894 1895 // We allow fp literals with f16x2 operands assuming that the specified 1896 // literal goes into the lower half and the upper half is zero. We also 1897 // require that the literal may be losslesly converted to f16. 1898 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1899 (type == MVT::v2i16)? MVT::i16 : 1900 (type == MVT::v2f32)? MVT::f32 : type; 1901 1902 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1903 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1904 } 1905 1906 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1907 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1908 } 1909 1910 bool AMDGPUOperand::isVRegWithInputMods() const { 1911 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1912 // GFX90A allows DPP on 64-bit operands. 1913 (isRegClass(AMDGPU::VReg_64RegClassID) && 1914 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1915 } 1916 1917 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1918 if (AsmParser->isVI()) 1919 return isVReg32(); 1920 else if (AsmParser->isGFX9Plus()) 1921 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1922 else 1923 return false; 1924 } 1925 1926 bool AMDGPUOperand::isSDWAFP16Operand() const { 1927 return isSDWAOperand(MVT::f16); 1928 } 1929 1930 bool AMDGPUOperand::isSDWAFP32Operand() const { 1931 return isSDWAOperand(MVT::f32); 1932 } 1933 1934 bool AMDGPUOperand::isSDWAInt16Operand() const { 1935 return isSDWAOperand(MVT::i16); 1936 } 1937 1938 bool AMDGPUOperand::isSDWAInt32Operand() const { 1939 return isSDWAOperand(MVT::i32); 1940 } 1941 1942 bool AMDGPUOperand::isBoolReg() const { 1943 auto FB = AsmParser->getFeatureBits(); 1944 return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1945 (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32())); 1946 } 1947 1948 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1949 { 1950 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1951 assert(Size == 2 || Size == 4 || Size == 8); 1952 1953 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1954 1955 if (Imm.Mods.Abs) { 1956 Val &= ~FpSignMask; 1957 } 1958 if (Imm.Mods.Neg) { 1959 Val ^= FpSignMask; 1960 } 1961 1962 return Val; 1963 } 1964 1965 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1966 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1967 Inst.getNumOperands())) { 1968 addLiteralImmOperand(Inst, Imm.Val, 1969 ApplyModifiers & 1970 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1971 } else { 1972 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1973 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1974 setImmKindNone(); 1975 } 1976 } 1977 1978 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1979 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1980 auto OpNum = Inst.getNumOperands(); 1981 // Check that this operand accepts literals 1982 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1983 1984 if (ApplyModifiers) { 1985 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1986 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1987 Val = applyInputFPModifiers(Val, Size); 1988 } 1989 1990 APInt Literal(64, Val); 1991 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1992 1993 if (Imm.IsFPImm) { // We got fp literal token 1994 switch (OpTy) { 1995 case AMDGPU::OPERAND_REG_IMM_INT64: 1996 case AMDGPU::OPERAND_REG_IMM_FP64: 1997 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1998 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1999 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2000 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 2001 AsmParser->hasInv2PiInlineImm())) { 2002 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 2003 setImmKindConst(); 2004 return; 2005 } 2006 2007 // Non-inlineable 2008 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2009 // For fp operands we check if low 32 bits are zeros 2010 if (Literal.getLoBits(32) != 0) { 2011 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2012 "Can't encode literal as exact 64-bit floating-point operand. " 2013 "Low 32-bits will be set to zero"); 2014 } 2015 2016 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2017 setImmKindLiteral(); 2018 return; 2019 } 2020 2021 // We don't allow fp literals in 64-bit integer instructions. It is 2022 // unclear how we should encode them. This case should be checked earlier 2023 // in predicate methods (isLiteralImm()) 2024 llvm_unreachable("fp literal in 64-bit integer instruction."); 2025 2026 case AMDGPU::OPERAND_REG_IMM_INT32: 2027 case AMDGPU::OPERAND_REG_IMM_FP32: 2028 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2029 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2030 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2031 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2032 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2033 case AMDGPU::OPERAND_REG_IMM_INT16: 2034 case AMDGPU::OPERAND_REG_IMM_FP16: 2035 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2036 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2037 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2038 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2039 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2040 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2041 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2042 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2043 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2044 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2045 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2046 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2047 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2048 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2049 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2050 case AMDGPU::OPERAND_KIMM32: 2051 case AMDGPU::OPERAND_KIMM16: { 2052 bool lost; 2053 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2054 // Convert literal to single precision 2055 FPLiteral.convert(*getOpFltSemantics(OpTy), 2056 APFloat::rmNearestTiesToEven, &lost); 2057 // We allow precision lost but not overflow or underflow. This should be 2058 // checked earlier in isLiteralImm() 2059 2060 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2061 Inst.addOperand(MCOperand::createImm(ImmVal)); 2062 setImmKindLiteral(); 2063 return; 2064 } 2065 default: 2066 llvm_unreachable("invalid operand size"); 2067 } 2068 2069 return; 2070 } 2071 2072 // We got int literal token. 2073 // Only sign extend inline immediates. 2074 switch (OpTy) { 2075 case AMDGPU::OPERAND_REG_IMM_INT32: 2076 case AMDGPU::OPERAND_REG_IMM_FP32: 2077 case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED: 2078 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2079 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2080 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2081 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2082 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2083 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2084 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2085 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2086 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2087 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2088 if (isSafeTruncation(Val, 32) && 2089 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2090 AsmParser->hasInv2PiInlineImm())) { 2091 Inst.addOperand(MCOperand::createImm(Val)); 2092 setImmKindConst(); 2093 return; 2094 } 2095 2096 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2097 setImmKindLiteral(); 2098 return; 2099 2100 case AMDGPU::OPERAND_REG_IMM_INT64: 2101 case AMDGPU::OPERAND_REG_IMM_FP64: 2102 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2103 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2104 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2105 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2106 Inst.addOperand(MCOperand::createImm(Val)); 2107 setImmKindConst(); 2108 return; 2109 } 2110 2111 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2112 setImmKindLiteral(); 2113 return; 2114 2115 case AMDGPU::OPERAND_REG_IMM_INT16: 2116 case AMDGPU::OPERAND_REG_IMM_FP16: 2117 case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED: 2118 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2119 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2120 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2121 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2122 if (isSafeTruncation(Val, 16) && 2123 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2124 AsmParser->hasInv2PiInlineImm())) { 2125 Inst.addOperand(MCOperand::createImm(Val)); 2126 setImmKindConst(); 2127 return; 2128 } 2129 2130 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2131 setImmKindLiteral(); 2132 return; 2133 2134 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2135 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2136 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2137 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2138 assert(isSafeTruncation(Val, 16)); 2139 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2140 AsmParser->hasInv2PiInlineImm())); 2141 2142 Inst.addOperand(MCOperand::createImm(Val)); 2143 return; 2144 } 2145 case AMDGPU::OPERAND_KIMM32: 2146 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(32).getZExtValue())); 2147 setImmKindNone(); 2148 return; 2149 case AMDGPU::OPERAND_KIMM16: 2150 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(16).getZExtValue())); 2151 setImmKindNone(); 2152 return; 2153 default: 2154 llvm_unreachable("invalid operand size"); 2155 } 2156 } 2157 2158 template <unsigned Bitwidth> 2159 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2160 APInt Literal(64, Imm.Val); 2161 setImmKindNone(); 2162 2163 if (!Imm.IsFPImm) { 2164 // We got int literal token. 2165 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2166 return; 2167 } 2168 2169 bool Lost; 2170 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2171 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2172 APFloat::rmNearestTiesToEven, &Lost); 2173 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2174 } 2175 2176 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2177 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2178 } 2179 2180 static bool isInlineValue(unsigned Reg) { 2181 switch (Reg) { 2182 case AMDGPU::SRC_SHARED_BASE: 2183 case AMDGPU::SRC_SHARED_LIMIT: 2184 case AMDGPU::SRC_PRIVATE_BASE: 2185 case AMDGPU::SRC_PRIVATE_LIMIT: 2186 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2187 return true; 2188 case AMDGPU::SRC_VCCZ: 2189 case AMDGPU::SRC_EXECZ: 2190 case AMDGPU::SRC_SCC: 2191 return true; 2192 case AMDGPU::SGPR_NULL: 2193 return true; 2194 default: 2195 return false; 2196 } 2197 } 2198 2199 bool AMDGPUOperand::isInlineValue() const { 2200 return isRegKind() && ::isInlineValue(getReg()); 2201 } 2202 2203 //===----------------------------------------------------------------------===// 2204 // AsmParser 2205 //===----------------------------------------------------------------------===// 2206 2207 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2208 if (Is == IS_VGPR) { 2209 switch (RegWidth) { 2210 default: return -1; 2211 case 1: return AMDGPU::VGPR_32RegClassID; 2212 case 2: return AMDGPU::VReg_64RegClassID; 2213 case 3: return AMDGPU::VReg_96RegClassID; 2214 case 4: return AMDGPU::VReg_128RegClassID; 2215 case 5: return AMDGPU::VReg_160RegClassID; 2216 case 6: return AMDGPU::VReg_192RegClassID; 2217 case 7: return AMDGPU::VReg_224RegClassID; 2218 case 8: return AMDGPU::VReg_256RegClassID; 2219 case 16: return AMDGPU::VReg_512RegClassID; 2220 case 32: return AMDGPU::VReg_1024RegClassID; 2221 } 2222 } else if (Is == IS_TTMP) { 2223 switch (RegWidth) { 2224 default: return -1; 2225 case 1: return AMDGPU::TTMP_32RegClassID; 2226 case 2: return AMDGPU::TTMP_64RegClassID; 2227 case 4: return AMDGPU::TTMP_128RegClassID; 2228 case 8: return AMDGPU::TTMP_256RegClassID; 2229 case 16: return AMDGPU::TTMP_512RegClassID; 2230 } 2231 } else if (Is == IS_SGPR) { 2232 switch (RegWidth) { 2233 default: return -1; 2234 case 1: return AMDGPU::SGPR_32RegClassID; 2235 case 2: return AMDGPU::SGPR_64RegClassID; 2236 case 3: return AMDGPU::SGPR_96RegClassID; 2237 case 4: return AMDGPU::SGPR_128RegClassID; 2238 case 5: return AMDGPU::SGPR_160RegClassID; 2239 case 6: return AMDGPU::SGPR_192RegClassID; 2240 case 7: return AMDGPU::SGPR_224RegClassID; 2241 case 8: return AMDGPU::SGPR_256RegClassID; 2242 case 16: return AMDGPU::SGPR_512RegClassID; 2243 } 2244 } else if (Is == IS_AGPR) { 2245 switch (RegWidth) { 2246 default: return -1; 2247 case 1: return AMDGPU::AGPR_32RegClassID; 2248 case 2: return AMDGPU::AReg_64RegClassID; 2249 case 3: return AMDGPU::AReg_96RegClassID; 2250 case 4: return AMDGPU::AReg_128RegClassID; 2251 case 5: return AMDGPU::AReg_160RegClassID; 2252 case 6: return AMDGPU::AReg_192RegClassID; 2253 case 7: return AMDGPU::AReg_224RegClassID; 2254 case 8: return AMDGPU::AReg_256RegClassID; 2255 case 16: return AMDGPU::AReg_512RegClassID; 2256 case 32: return AMDGPU::AReg_1024RegClassID; 2257 } 2258 } 2259 return -1; 2260 } 2261 2262 static unsigned getSpecialRegForName(StringRef RegName) { 2263 return StringSwitch<unsigned>(RegName) 2264 .Case("exec", AMDGPU::EXEC) 2265 .Case("vcc", AMDGPU::VCC) 2266 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2267 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2268 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2269 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2270 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2271 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2272 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2273 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2274 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2275 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2276 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2277 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2278 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2279 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2280 .Case("m0", AMDGPU::M0) 2281 .Case("vccz", AMDGPU::SRC_VCCZ) 2282 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2283 .Case("execz", AMDGPU::SRC_EXECZ) 2284 .Case("src_execz", AMDGPU::SRC_EXECZ) 2285 .Case("scc", AMDGPU::SRC_SCC) 2286 .Case("src_scc", AMDGPU::SRC_SCC) 2287 .Case("tba", AMDGPU::TBA) 2288 .Case("tma", AMDGPU::TMA) 2289 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2290 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2291 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2292 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2293 .Case("vcc_lo", AMDGPU::VCC_LO) 2294 .Case("vcc_hi", AMDGPU::VCC_HI) 2295 .Case("exec_lo", AMDGPU::EXEC_LO) 2296 .Case("exec_hi", AMDGPU::EXEC_HI) 2297 .Case("tma_lo", AMDGPU::TMA_LO) 2298 .Case("tma_hi", AMDGPU::TMA_HI) 2299 .Case("tba_lo", AMDGPU::TBA_LO) 2300 .Case("tba_hi", AMDGPU::TBA_HI) 2301 .Case("pc", AMDGPU::PC_REG) 2302 .Case("null", AMDGPU::SGPR_NULL) 2303 .Default(AMDGPU::NoRegister); 2304 } 2305 2306 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2307 SMLoc &EndLoc, bool RestoreOnFailure) { 2308 auto R = parseRegister(); 2309 if (!R) return true; 2310 assert(R->isReg()); 2311 RegNo = R->getReg(); 2312 StartLoc = R->getStartLoc(); 2313 EndLoc = R->getEndLoc(); 2314 return false; 2315 } 2316 2317 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2318 SMLoc &EndLoc) { 2319 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2320 } 2321 2322 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2323 SMLoc &StartLoc, 2324 SMLoc &EndLoc) { 2325 bool Result = 2326 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2327 bool PendingErrors = getParser().hasPendingError(); 2328 getParser().clearPendingErrors(); 2329 if (PendingErrors) 2330 return MatchOperand_ParseFail; 2331 if (Result) 2332 return MatchOperand_NoMatch; 2333 return MatchOperand_Success; 2334 } 2335 2336 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2337 RegisterKind RegKind, unsigned Reg1, 2338 SMLoc Loc) { 2339 switch (RegKind) { 2340 case IS_SPECIAL: 2341 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2342 Reg = AMDGPU::EXEC; 2343 RegWidth = 2; 2344 return true; 2345 } 2346 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2347 Reg = AMDGPU::FLAT_SCR; 2348 RegWidth = 2; 2349 return true; 2350 } 2351 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2352 Reg = AMDGPU::XNACK_MASK; 2353 RegWidth = 2; 2354 return true; 2355 } 2356 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2357 Reg = AMDGPU::VCC; 2358 RegWidth = 2; 2359 return true; 2360 } 2361 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2362 Reg = AMDGPU::TBA; 2363 RegWidth = 2; 2364 return true; 2365 } 2366 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2367 Reg = AMDGPU::TMA; 2368 RegWidth = 2; 2369 return true; 2370 } 2371 Error(Loc, "register does not fit in the list"); 2372 return false; 2373 case IS_VGPR: 2374 case IS_SGPR: 2375 case IS_AGPR: 2376 case IS_TTMP: 2377 if (Reg1 != Reg + RegWidth) { 2378 Error(Loc, "registers in a list must have consecutive indices"); 2379 return false; 2380 } 2381 RegWidth++; 2382 return true; 2383 default: 2384 llvm_unreachable("unexpected register kind"); 2385 } 2386 } 2387 2388 struct RegInfo { 2389 StringLiteral Name; 2390 RegisterKind Kind; 2391 }; 2392 2393 static constexpr RegInfo RegularRegisters[] = { 2394 {{"v"}, IS_VGPR}, 2395 {{"s"}, IS_SGPR}, 2396 {{"ttmp"}, IS_TTMP}, 2397 {{"acc"}, IS_AGPR}, 2398 {{"a"}, IS_AGPR}, 2399 }; 2400 2401 static bool isRegularReg(RegisterKind Kind) { 2402 return Kind == IS_VGPR || 2403 Kind == IS_SGPR || 2404 Kind == IS_TTMP || 2405 Kind == IS_AGPR; 2406 } 2407 2408 static const RegInfo* getRegularRegInfo(StringRef Str) { 2409 for (const RegInfo &Reg : RegularRegisters) 2410 if (Str.startswith(Reg.Name)) 2411 return &Reg; 2412 return nullptr; 2413 } 2414 2415 static bool getRegNum(StringRef Str, unsigned& Num) { 2416 return !Str.getAsInteger(10, Num); 2417 } 2418 2419 bool 2420 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2421 const AsmToken &NextToken) const { 2422 2423 // A list of consecutive registers: [s0,s1,s2,s3] 2424 if (Token.is(AsmToken::LBrac)) 2425 return true; 2426 2427 if (!Token.is(AsmToken::Identifier)) 2428 return false; 2429 2430 // A single register like s0 or a range of registers like s[0:1] 2431 2432 StringRef Str = Token.getString(); 2433 const RegInfo *Reg = getRegularRegInfo(Str); 2434 if (Reg) { 2435 StringRef RegName = Reg->Name; 2436 StringRef RegSuffix = Str.substr(RegName.size()); 2437 if (!RegSuffix.empty()) { 2438 unsigned Num; 2439 // A single register with an index: rXX 2440 if (getRegNum(RegSuffix, Num)) 2441 return true; 2442 } else { 2443 // A range of registers: r[XX:YY]. 2444 if (NextToken.is(AsmToken::LBrac)) 2445 return true; 2446 } 2447 } 2448 2449 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2450 } 2451 2452 bool 2453 AMDGPUAsmParser::isRegister() 2454 { 2455 return isRegister(getToken(), peekToken()); 2456 } 2457 2458 unsigned 2459 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2460 unsigned RegNum, 2461 unsigned RegWidth, 2462 SMLoc Loc) { 2463 2464 assert(isRegularReg(RegKind)); 2465 2466 unsigned AlignSize = 1; 2467 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2468 // SGPR and TTMP registers must be aligned. 2469 // Max required alignment is 4 dwords. 2470 AlignSize = std::min(RegWidth, 4u); 2471 } 2472 2473 if (RegNum % AlignSize != 0) { 2474 Error(Loc, "invalid register alignment"); 2475 return AMDGPU::NoRegister; 2476 } 2477 2478 unsigned RegIdx = RegNum / AlignSize; 2479 int RCID = getRegClass(RegKind, RegWidth); 2480 if (RCID == -1) { 2481 Error(Loc, "invalid or unsupported register size"); 2482 return AMDGPU::NoRegister; 2483 } 2484 2485 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2486 const MCRegisterClass RC = TRI->getRegClass(RCID); 2487 if (RegIdx >= RC.getNumRegs()) { 2488 Error(Loc, "register index is out of range"); 2489 return AMDGPU::NoRegister; 2490 } 2491 2492 return RC.getRegister(RegIdx); 2493 } 2494 2495 bool 2496 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2497 int64_t RegLo, RegHi; 2498 if (!skipToken(AsmToken::LBrac, "missing register index")) 2499 return false; 2500 2501 SMLoc FirstIdxLoc = getLoc(); 2502 SMLoc SecondIdxLoc; 2503 2504 if (!parseExpr(RegLo)) 2505 return false; 2506 2507 if (trySkipToken(AsmToken::Colon)) { 2508 SecondIdxLoc = getLoc(); 2509 if (!parseExpr(RegHi)) 2510 return false; 2511 } else { 2512 RegHi = RegLo; 2513 } 2514 2515 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2516 return false; 2517 2518 if (!isUInt<32>(RegLo)) { 2519 Error(FirstIdxLoc, "invalid register index"); 2520 return false; 2521 } 2522 2523 if (!isUInt<32>(RegHi)) { 2524 Error(SecondIdxLoc, "invalid register index"); 2525 return false; 2526 } 2527 2528 if (RegLo > RegHi) { 2529 Error(FirstIdxLoc, "first register index should not exceed second index"); 2530 return false; 2531 } 2532 2533 Num = static_cast<unsigned>(RegLo); 2534 Width = (RegHi - RegLo) + 1; 2535 return true; 2536 } 2537 2538 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2539 unsigned &RegNum, unsigned &RegWidth, 2540 SmallVectorImpl<AsmToken> &Tokens) { 2541 assert(isToken(AsmToken::Identifier)); 2542 unsigned Reg = getSpecialRegForName(getTokenStr()); 2543 if (Reg) { 2544 RegNum = 0; 2545 RegWidth = 1; 2546 RegKind = IS_SPECIAL; 2547 Tokens.push_back(getToken()); 2548 lex(); // skip register name 2549 } 2550 return Reg; 2551 } 2552 2553 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2554 unsigned &RegNum, unsigned &RegWidth, 2555 SmallVectorImpl<AsmToken> &Tokens) { 2556 assert(isToken(AsmToken::Identifier)); 2557 StringRef RegName = getTokenStr(); 2558 auto Loc = getLoc(); 2559 2560 const RegInfo *RI = getRegularRegInfo(RegName); 2561 if (!RI) { 2562 Error(Loc, "invalid register name"); 2563 return AMDGPU::NoRegister; 2564 } 2565 2566 Tokens.push_back(getToken()); 2567 lex(); // skip register name 2568 2569 RegKind = RI->Kind; 2570 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2571 if (!RegSuffix.empty()) { 2572 // Single 32-bit register: vXX. 2573 if (!getRegNum(RegSuffix, RegNum)) { 2574 Error(Loc, "invalid register index"); 2575 return AMDGPU::NoRegister; 2576 } 2577 RegWidth = 1; 2578 } else { 2579 // Range of registers: v[XX:YY]. ":YY" is optional. 2580 if (!ParseRegRange(RegNum, RegWidth)) 2581 return AMDGPU::NoRegister; 2582 } 2583 2584 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2585 } 2586 2587 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2588 unsigned &RegWidth, 2589 SmallVectorImpl<AsmToken> &Tokens) { 2590 unsigned Reg = AMDGPU::NoRegister; 2591 auto ListLoc = getLoc(); 2592 2593 if (!skipToken(AsmToken::LBrac, 2594 "expected a register or a list of registers")) { 2595 return AMDGPU::NoRegister; 2596 } 2597 2598 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2599 2600 auto Loc = getLoc(); 2601 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2602 return AMDGPU::NoRegister; 2603 if (RegWidth != 1) { 2604 Error(Loc, "expected a single 32-bit register"); 2605 return AMDGPU::NoRegister; 2606 } 2607 2608 for (; trySkipToken(AsmToken::Comma); ) { 2609 RegisterKind NextRegKind; 2610 unsigned NextReg, NextRegNum, NextRegWidth; 2611 Loc = getLoc(); 2612 2613 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2614 NextRegNum, NextRegWidth, 2615 Tokens)) { 2616 return AMDGPU::NoRegister; 2617 } 2618 if (NextRegWidth != 1) { 2619 Error(Loc, "expected a single 32-bit register"); 2620 return AMDGPU::NoRegister; 2621 } 2622 if (NextRegKind != RegKind) { 2623 Error(Loc, "registers in a list must be of the same kind"); 2624 return AMDGPU::NoRegister; 2625 } 2626 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2627 return AMDGPU::NoRegister; 2628 } 2629 2630 if (!skipToken(AsmToken::RBrac, 2631 "expected a comma or a closing square bracket")) { 2632 return AMDGPU::NoRegister; 2633 } 2634 2635 if (isRegularReg(RegKind)) 2636 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2637 2638 return Reg; 2639 } 2640 2641 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2642 unsigned &RegNum, unsigned &RegWidth, 2643 SmallVectorImpl<AsmToken> &Tokens) { 2644 auto Loc = getLoc(); 2645 Reg = AMDGPU::NoRegister; 2646 2647 if (isToken(AsmToken::Identifier)) { 2648 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2649 if (Reg == AMDGPU::NoRegister) 2650 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2651 } else { 2652 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2653 } 2654 2655 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2656 if (Reg == AMDGPU::NoRegister) { 2657 assert(Parser.hasPendingError()); 2658 return false; 2659 } 2660 2661 if (!subtargetHasRegister(*TRI, Reg)) { 2662 if (Reg == AMDGPU::SGPR_NULL) { 2663 Error(Loc, "'null' operand is not supported on this GPU"); 2664 } else { 2665 Error(Loc, "register not available on this GPU"); 2666 } 2667 return false; 2668 } 2669 2670 return true; 2671 } 2672 2673 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2674 unsigned &RegNum, unsigned &RegWidth, 2675 bool RestoreOnFailure /*=false*/) { 2676 Reg = AMDGPU::NoRegister; 2677 2678 SmallVector<AsmToken, 1> Tokens; 2679 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2680 if (RestoreOnFailure) { 2681 while (!Tokens.empty()) { 2682 getLexer().UnLex(Tokens.pop_back_val()); 2683 } 2684 } 2685 return true; 2686 } 2687 return false; 2688 } 2689 2690 Optional<StringRef> 2691 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2692 switch (RegKind) { 2693 case IS_VGPR: 2694 return StringRef(".amdgcn.next_free_vgpr"); 2695 case IS_SGPR: 2696 return StringRef(".amdgcn.next_free_sgpr"); 2697 default: 2698 return None; 2699 } 2700 } 2701 2702 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2703 auto SymbolName = getGprCountSymbolName(RegKind); 2704 assert(SymbolName && "initializing invalid register kind"); 2705 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2706 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2707 } 2708 2709 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2710 unsigned DwordRegIndex, 2711 unsigned RegWidth) { 2712 // Symbols are only defined for GCN targets 2713 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2714 return true; 2715 2716 auto SymbolName = getGprCountSymbolName(RegKind); 2717 if (!SymbolName) 2718 return true; 2719 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2720 2721 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2722 int64_t OldCount; 2723 2724 if (!Sym->isVariable()) 2725 return !Error(getLoc(), 2726 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2727 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2728 return !Error( 2729 getLoc(), 2730 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2731 2732 if (OldCount <= NewMax) 2733 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2734 2735 return true; 2736 } 2737 2738 std::unique_ptr<AMDGPUOperand> 2739 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2740 const auto &Tok = getToken(); 2741 SMLoc StartLoc = Tok.getLoc(); 2742 SMLoc EndLoc = Tok.getEndLoc(); 2743 RegisterKind RegKind; 2744 unsigned Reg, RegNum, RegWidth; 2745 2746 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2747 return nullptr; 2748 } 2749 if (isHsaAbiVersion3Or4(&getSTI())) { 2750 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2751 return nullptr; 2752 } else 2753 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2754 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2755 } 2756 2757 OperandMatchResultTy 2758 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2759 // TODO: add syntactic sugar for 1/(2*PI) 2760 2761 assert(!isRegister()); 2762 assert(!isModifier()); 2763 2764 const auto& Tok = getToken(); 2765 const auto& NextTok = peekToken(); 2766 bool IsReal = Tok.is(AsmToken::Real); 2767 SMLoc S = getLoc(); 2768 bool Negate = false; 2769 2770 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2771 lex(); 2772 IsReal = true; 2773 Negate = true; 2774 } 2775 2776 if (IsReal) { 2777 // Floating-point expressions are not supported. 2778 // Can only allow floating-point literals with an 2779 // optional sign. 2780 2781 StringRef Num = getTokenStr(); 2782 lex(); 2783 2784 APFloat RealVal(APFloat::IEEEdouble()); 2785 auto roundMode = APFloat::rmNearestTiesToEven; 2786 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2787 return MatchOperand_ParseFail; 2788 } 2789 if (Negate) 2790 RealVal.changeSign(); 2791 2792 Operands.push_back( 2793 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2794 AMDGPUOperand::ImmTyNone, true)); 2795 2796 return MatchOperand_Success; 2797 2798 } else { 2799 int64_t IntVal; 2800 const MCExpr *Expr; 2801 SMLoc S = getLoc(); 2802 2803 if (HasSP3AbsModifier) { 2804 // This is a workaround for handling expressions 2805 // as arguments of SP3 'abs' modifier, for example: 2806 // |1.0| 2807 // |-1| 2808 // |1+x| 2809 // This syntax is not compatible with syntax of standard 2810 // MC expressions (due to the trailing '|'). 2811 SMLoc EndLoc; 2812 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2813 return MatchOperand_ParseFail; 2814 } else { 2815 if (Parser.parseExpression(Expr)) 2816 return MatchOperand_ParseFail; 2817 } 2818 2819 if (Expr->evaluateAsAbsolute(IntVal)) { 2820 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2821 } else { 2822 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2823 } 2824 2825 return MatchOperand_Success; 2826 } 2827 2828 return MatchOperand_NoMatch; 2829 } 2830 2831 OperandMatchResultTy 2832 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2833 if (!isRegister()) 2834 return MatchOperand_NoMatch; 2835 2836 if (auto R = parseRegister()) { 2837 assert(R->isReg()); 2838 Operands.push_back(std::move(R)); 2839 return MatchOperand_Success; 2840 } 2841 return MatchOperand_ParseFail; 2842 } 2843 2844 OperandMatchResultTy 2845 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2846 auto res = parseReg(Operands); 2847 if (res != MatchOperand_NoMatch) { 2848 return res; 2849 } else if (isModifier()) { 2850 return MatchOperand_NoMatch; 2851 } else { 2852 return parseImm(Operands, HasSP3AbsMod); 2853 } 2854 } 2855 2856 bool 2857 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2858 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2859 const auto &str = Token.getString(); 2860 return str == "abs" || str == "neg" || str == "sext"; 2861 } 2862 return false; 2863 } 2864 2865 bool 2866 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2867 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2868 } 2869 2870 bool 2871 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2872 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2873 } 2874 2875 bool 2876 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2877 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2878 } 2879 2880 // Check if this is an operand modifier or an opcode modifier 2881 // which may look like an expression but it is not. We should 2882 // avoid parsing these modifiers as expressions. Currently 2883 // recognized sequences are: 2884 // |...| 2885 // abs(...) 2886 // neg(...) 2887 // sext(...) 2888 // -reg 2889 // -|...| 2890 // -abs(...) 2891 // name:... 2892 // Note that simple opcode modifiers like 'gds' may be parsed as 2893 // expressions; this is a special case. See getExpressionAsToken. 2894 // 2895 bool 2896 AMDGPUAsmParser::isModifier() { 2897 2898 AsmToken Tok = getToken(); 2899 AsmToken NextToken[2]; 2900 peekTokens(NextToken); 2901 2902 return isOperandModifier(Tok, NextToken[0]) || 2903 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2904 isOpcodeModifierWithVal(Tok, NextToken[0]); 2905 } 2906 2907 // Check if the current token is an SP3 'neg' modifier. 2908 // Currently this modifier is allowed in the following context: 2909 // 2910 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2911 // 2. Before an 'abs' modifier: -abs(...) 2912 // 3. Before an SP3 'abs' modifier: -|...| 2913 // 2914 // In all other cases "-" is handled as a part 2915 // of an expression that follows the sign. 2916 // 2917 // Note: When "-" is followed by an integer literal, 2918 // this is interpreted as integer negation rather 2919 // than a floating-point NEG modifier applied to N. 2920 // Beside being contr-intuitive, such use of floating-point 2921 // NEG modifier would have resulted in different meaning 2922 // of integer literals used with VOP1/2/C and VOP3, 2923 // for example: 2924 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2925 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2926 // Negative fp literals with preceding "-" are 2927 // handled likewise for unifomtity 2928 // 2929 bool 2930 AMDGPUAsmParser::parseSP3NegModifier() { 2931 2932 AsmToken NextToken[2]; 2933 peekTokens(NextToken); 2934 2935 if (isToken(AsmToken::Minus) && 2936 (isRegister(NextToken[0], NextToken[1]) || 2937 NextToken[0].is(AsmToken::Pipe) || 2938 isId(NextToken[0], "abs"))) { 2939 lex(); 2940 return true; 2941 } 2942 2943 return false; 2944 } 2945 2946 OperandMatchResultTy 2947 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2948 bool AllowImm) { 2949 bool Neg, SP3Neg; 2950 bool Abs, SP3Abs; 2951 SMLoc Loc; 2952 2953 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2954 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2955 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2956 return MatchOperand_ParseFail; 2957 } 2958 2959 SP3Neg = parseSP3NegModifier(); 2960 2961 Loc = getLoc(); 2962 Neg = trySkipId("neg"); 2963 if (Neg && SP3Neg) { 2964 Error(Loc, "expected register or immediate"); 2965 return MatchOperand_ParseFail; 2966 } 2967 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2968 return MatchOperand_ParseFail; 2969 2970 Abs = trySkipId("abs"); 2971 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2972 return MatchOperand_ParseFail; 2973 2974 Loc = getLoc(); 2975 SP3Abs = trySkipToken(AsmToken::Pipe); 2976 if (Abs && SP3Abs) { 2977 Error(Loc, "expected register or immediate"); 2978 return MatchOperand_ParseFail; 2979 } 2980 2981 OperandMatchResultTy Res; 2982 if (AllowImm) { 2983 Res = parseRegOrImm(Operands, SP3Abs); 2984 } else { 2985 Res = parseReg(Operands); 2986 } 2987 if (Res != MatchOperand_Success) { 2988 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2989 } 2990 2991 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2992 return MatchOperand_ParseFail; 2993 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2994 return MatchOperand_ParseFail; 2995 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2996 return MatchOperand_ParseFail; 2997 2998 AMDGPUOperand::Modifiers Mods; 2999 Mods.Abs = Abs || SP3Abs; 3000 Mods.Neg = Neg || SP3Neg; 3001 3002 if (Mods.hasFPModifiers()) { 3003 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3004 if (Op.isExpr()) { 3005 Error(Op.getStartLoc(), "expected an absolute expression"); 3006 return MatchOperand_ParseFail; 3007 } 3008 Op.setModifiers(Mods); 3009 } 3010 return MatchOperand_Success; 3011 } 3012 3013 OperandMatchResultTy 3014 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 3015 bool AllowImm) { 3016 bool Sext = trySkipId("sext"); 3017 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 3018 return MatchOperand_ParseFail; 3019 3020 OperandMatchResultTy Res; 3021 if (AllowImm) { 3022 Res = parseRegOrImm(Operands); 3023 } else { 3024 Res = parseReg(Operands); 3025 } 3026 if (Res != MatchOperand_Success) { 3027 return Sext? MatchOperand_ParseFail : Res; 3028 } 3029 3030 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3031 return MatchOperand_ParseFail; 3032 3033 AMDGPUOperand::Modifiers Mods; 3034 Mods.Sext = Sext; 3035 3036 if (Mods.hasIntModifiers()) { 3037 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3038 if (Op.isExpr()) { 3039 Error(Op.getStartLoc(), "expected an absolute expression"); 3040 return MatchOperand_ParseFail; 3041 } 3042 Op.setModifiers(Mods); 3043 } 3044 3045 return MatchOperand_Success; 3046 } 3047 3048 OperandMatchResultTy 3049 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3050 return parseRegOrImmWithFPInputMods(Operands, false); 3051 } 3052 3053 OperandMatchResultTy 3054 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3055 return parseRegOrImmWithIntInputMods(Operands, false); 3056 } 3057 3058 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3059 auto Loc = getLoc(); 3060 if (trySkipId("off")) { 3061 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3062 AMDGPUOperand::ImmTyOff, false)); 3063 return MatchOperand_Success; 3064 } 3065 3066 if (!isRegister()) 3067 return MatchOperand_NoMatch; 3068 3069 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3070 if (Reg) { 3071 Operands.push_back(std::move(Reg)); 3072 return MatchOperand_Success; 3073 } 3074 3075 return MatchOperand_ParseFail; 3076 3077 } 3078 3079 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3080 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3081 3082 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3083 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3084 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3085 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3086 return Match_InvalidOperand; 3087 3088 if ((TSFlags & SIInstrFlags::VOP3) && 3089 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3090 getForcedEncodingSize() != 64) 3091 return Match_PreferE32; 3092 3093 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3094 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3095 // v_mac_f32/16 allow only dst_sel == DWORD; 3096 auto OpNum = 3097 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3098 const auto &Op = Inst.getOperand(OpNum); 3099 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3100 return Match_InvalidOperand; 3101 } 3102 } 3103 3104 return Match_Success; 3105 } 3106 3107 static ArrayRef<unsigned> getAllVariants() { 3108 static const unsigned Variants[] = { 3109 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3110 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3111 }; 3112 3113 return makeArrayRef(Variants); 3114 } 3115 3116 // What asm variants we should check 3117 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3118 if (getForcedEncodingSize() == 32) { 3119 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3120 return makeArrayRef(Variants); 3121 } 3122 3123 if (isForcedVOP3()) { 3124 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3125 return makeArrayRef(Variants); 3126 } 3127 3128 if (isForcedSDWA()) { 3129 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3130 AMDGPUAsmVariants::SDWA9}; 3131 return makeArrayRef(Variants); 3132 } 3133 3134 if (isForcedDPP()) { 3135 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3136 return makeArrayRef(Variants); 3137 } 3138 3139 return getAllVariants(); 3140 } 3141 3142 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3143 if (getForcedEncodingSize() == 32) 3144 return "e32"; 3145 3146 if (isForcedVOP3()) 3147 return "e64"; 3148 3149 if (isForcedSDWA()) 3150 return "sdwa"; 3151 3152 if (isForcedDPP()) 3153 return "dpp"; 3154 3155 return ""; 3156 } 3157 3158 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3159 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3160 const unsigned Num = Desc.getNumImplicitUses(); 3161 for (unsigned i = 0; i < Num; ++i) { 3162 unsigned Reg = Desc.ImplicitUses[i]; 3163 switch (Reg) { 3164 case AMDGPU::FLAT_SCR: 3165 case AMDGPU::VCC: 3166 case AMDGPU::VCC_LO: 3167 case AMDGPU::VCC_HI: 3168 case AMDGPU::M0: 3169 return Reg; 3170 default: 3171 break; 3172 } 3173 } 3174 return AMDGPU::NoRegister; 3175 } 3176 3177 // NB: This code is correct only when used to check constant 3178 // bus limitations because GFX7 support no f16 inline constants. 3179 // Note that there are no cases when a GFX7 opcode violates 3180 // constant bus limitations due to the use of an f16 constant. 3181 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3182 unsigned OpIdx) const { 3183 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3184 3185 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3186 return false; 3187 } 3188 3189 const MCOperand &MO = Inst.getOperand(OpIdx); 3190 3191 int64_t Val = MO.getImm(); 3192 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3193 3194 switch (OpSize) { // expected operand size 3195 case 8: 3196 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3197 case 4: 3198 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3199 case 2: { 3200 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3201 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3202 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3203 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3204 return AMDGPU::isInlinableIntLiteral(Val); 3205 3206 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3207 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3208 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3209 return AMDGPU::isInlinableIntLiteralV216(Val); 3210 3211 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3212 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3213 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3214 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3215 3216 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3217 } 3218 default: 3219 llvm_unreachable("invalid operand size"); 3220 } 3221 } 3222 3223 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3224 if (!isGFX10Plus()) 3225 return 1; 3226 3227 switch (Opcode) { 3228 // 64-bit shift instructions can use only one scalar value input 3229 case AMDGPU::V_LSHLREV_B64_e64: 3230 case AMDGPU::V_LSHLREV_B64_gfx10: 3231 case AMDGPU::V_LSHRREV_B64_e64: 3232 case AMDGPU::V_LSHRREV_B64_gfx10: 3233 case AMDGPU::V_ASHRREV_I64_e64: 3234 case AMDGPU::V_ASHRREV_I64_gfx10: 3235 case AMDGPU::V_LSHL_B64_e64: 3236 case AMDGPU::V_LSHR_B64_e64: 3237 case AMDGPU::V_ASHR_I64_e64: 3238 return 1; 3239 default: 3240 return 2; 3241 } 3242 } 3243 3244 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3245 const MCOperand &MO = Inst.getOperand(OpIdx); 3246 if (MO.isImm()) { 3247 return !isInlineConstant(Inst, OpIdx); 3248 } else if (MO.isReg()) { 3249 auto Reg = MO.getReg(); 3250 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3251 auto PReg = mc2PseudoReg(Reg); 3252 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3253 } else { 3254 return true; 3255 } 3256 } 3257 3258 bool 3259 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3260 const OperandVector &Operands) { 3261 const unsigned Opcode = Inst.getOpcode(); 3262 const MCInstrDesc &Desc = MII.get(Opcode); 3263 unsigned LastSGPR = AMDGPU::NoRegister; 3264 unsigned ConstantBusUseCount = 0; 3265 unsigned NumLiterals = 0; 3266 unsigned LiteralSize; 3267 3268 if (Desc.TSFlags & 3269 (SIInstrFlags::VOPC | 3270 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3271 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3272 SIInstrFlags::SDWA)) { 3273 // Check special imm operands (used by madmk, etc) 3274 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3275 ++NumLiterals; 3276 LiteralSize = 4; 3277 } 3278 3279 SmallDenseSet<unsigned> SGPRsUsed; 3280 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3281 if (SGPRUsed != AMDGPU::NoRegister) { 3282 SGPRsUsed.insert(SGPRUsed); 3283 ++ConstantBusUseCount; 3284 } 3285 3286 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3287 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3288 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3289 3290 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3291 3292 for (int OpIdx : OpIndices) { 3293 if (OpIdx == -1) break; 3294 3295 const MCOperand &MO = Inst.getOperand(OpIdx); 3296 if (usesConstantBus(Inst, OpIdx)) { 3297 if (MO.isReg()) { 3298 LastSGPR = mc2PseudoReg(MO.getReg()); 3299 // Pairs of registers with a partial intersections like these 3300 // s0, s[0:1] 3301 // flat_scratch_lo, flat_scratch 3302 // flat_scratch_lo, flat_scratch_hi 3303 // are theoretically valid but they are disabled anyway. 3304 // Note that this code mimics SIInstrInfo::verifyInstruction 3305 if (!SGPRsUsed.count(LastSGPR)) { 3306 SGPRsUsed.insert(LastSGPR); 3307 ++ConstantBusUseCount; 3308 } 3309 } else { // Expression or a literal 3310 3311 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3312 continue; // special operand like VINTERP attr_chan 3313 3314 // An instruction may use only one literal. 3315 // This has been validated on the previous step. 3316 // See validateVOPLiteral. 3317 // This literal may be used as more than one operand. 3318 // If all these operands are of the same size, 3319 // this literal counts as one scalar value. 3320 // Otherwise it counts as 2 scalar values. 3321 // See "GFX10 Shader Programming", section 3.6.2.3. 3322 3323 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3324 if (Size < 4) Size = 4; 3325 3326 if (NumLiterals == 0) { 3327 NumLiterals = 1; 3328 LiteralSize = Size; 3329 } else if (LiteralSize != Size) { 3330 NumLiterals = 2; 3331 } 3332 } 3333 } 3334 } 3335 } 3336 ConstantBusUseCount += NumLiterals; 3337 3338 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3339 return true; 3340 3341 SMLoc LitLoc = getLitLoc(Operands); 3342 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3343 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3344 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3345 return false; 3346 } 3347 3348 bool 3349 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3350 const OperandVector &Operands) { 3351 const unsigned Opcode = Inst.getOpcode(); 3352 const MCInstrDesc &Desc = MII.get(Opcode); 3353 3354 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3355 if (DstIdx == -1 || 3356 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3357 return true; 3358 } 3359 3360 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3361 3362 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3363 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3364 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3365 3366 assert(DstIdx != -1); 3367 const MCOperand &Dst = Inst.getOperand(DstIdx); 3368 assert(Dst.isReg()); 3369 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3370 3371 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3372 3373 for (int SrcIdx : SrcIndices) { 3374 if (SrcIdx == -1) break; 3375 const MCOperand &Src = Inst.getOperand(SrcIdx); 3376 if (Src.isReg()) { 3377 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3378 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3379 Error(getRegLoc(SrcReg, Operands), 3380 "destination must be different than all sources"); 3381 return false; 3382 } 3383 } 3384 } 3385 3386 return true; 3387 } 3388 3389 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3390 3391 const unsigned Opc = Inst.getOpcode(); 3392 const MCInstrDesc &Desc = MII.get(Opc); 3393 3394 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3395 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3396 assert(ClampIdx != -1); 3397 return Inst.getOperand(ClampIdx).getImm() == 0; 3398 } 3399 3400 return true; 3401 } 3402 3403 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3404 3405 const unsigned Opc = Inst.getOpcode(); 3406 const MCInstrDesc &Desc = MII.get(Opc); 3407 3408 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3409 return true; 3410 3411 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3412 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3413 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3414 3415 assert(VDataIdx != -1); 3416 3417 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3418 return true; 3419 3420 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3421 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3422 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3423 if (DMask == 0) 3424 DMask = 1; 3425 3426 unsigned DataSize = 3427 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3428 if (hasPackedD16()) { 3429 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3430 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3431 DataSize = (DataSize + 1) / 2; 3432 } 3433 3434 return (VDataSize / 4) == DataSize + TFESize; 3435 } 3436 3437 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3438 const unsigned Opc = Inst.getOpcode(); 3439 const MCInstrDesc &Desc = MII.get(Opc); 3440 3441 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3442 return true; 3443 3444 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3445 3446 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3447 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3448 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3449 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3450 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3451 int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16); 3452 3453 assert(VAddr0Idx != -1); 3454 assert(SrsrcIdx != -1); 3455 assert(SrsrcIdx > VAddr0Idx); 3456 3457 if (DimIdx == -1) 3458 return true; // intersect_ray 3459 3460 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3461 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3462 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3463 unsigned ActualAddrSize = 3464 IsNSA ? SrsrcIdx - VAddr0Idx 3465 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3466 bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm()); 3467 3468 unsigned ExpectedAddrSize = 3469 AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16()); 3470 3471 if (!IsNSA) { 3472 if (ExpectedAddrSize > 8) 3473 ExpectedAddrSize = 16; 3474 3475 // Allow oversized 8 VGPR vaddr when only 5/6/7 VGPRs are required. 3476 // This provides backward compatibility for assembly created 3477 // before 160b/192b/224b types were directly supported. 3478 if (ActualAddrSize == 8 && (ExpectedAddrSize >= 5 && ExpectedAddrSize <= 7)) 3479 return true; 3480 } 3481 3482 return ActualAddrSize == ExpectedAddrSize; 3483 } 3484 3485 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3486 3487 const unsigned Opc = Inst.getOpcode(); 3488 const MCInstrDesc &Desc = MII.get(Opc); 3489 3490 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3491 return true; 3492 if (!Desc.mayLoad() || !Desc.mayStore()) 3493 return true; // Not atomic 3494 3495 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3496 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3497 3498 // This is an incomplete check because image_atomic_cmpswap 3499 // may only use 0x3 and 0xf while other atomic operations 3500 // may use 0x1 and 0x3. However these limitations are 3501 // verified when we check that dmask matches dst size. 3502 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3503 } 3504 3505 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3506 3507 const unsigned Opc = Inst.getOpcode(); 3508 const MCInstrDesc &Desc = MII.get(Opc); 3509 3510 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3511 return true; 3512 3513 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3514 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3515 3516 // GATHER4 instructions use dmask in a different fashion compared to 3517 // other MIMG instructions. The only useful DMASK values are 3518 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3519 // (red,red,red,red) etc.) The ISA document doesn't mention 3520 // this. 3521 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3522 } 3523 3524 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3525 const unsigned Opc = Inst.getOpcode(); 3526 const MCInstrDesc &Desc = MII.get(Opc); 3527 3528 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3529 return true; 3530 3531 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3532 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3533 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3534 3535 if (!BaseOpcode->MSAA) 3536 return true; 3537 3538 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3539 assert(DimIdx != -1); 3540 3541 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3542 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3543 3544 return DimInfo->MSAA; 3545 } 3546 3547 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3548 { 3549 switch (Opcode) { 3550 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3551 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3552 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3553 return true; 3554 default: 3555 return false; 3556 } 3557 } 3558 3559 // movrels* opcodes should only allow VGPRS as src0. 3560 // This is specified in .td description for vop1/vop3, 3561 // but sdwa is handled differently. See isSDWAOperand. 3562 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3563 const OperandVector &Operands) { 3564 3565 const unsigned Opc = Inst.getOpcode(); 3566 const MCInstrDesc &Desc = MII.get(Opc); 3567 3568 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3569 return true; 3570 3571 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3572 assert(Src0Idx != -1); 3573 3574 SMLoc ErrLoc; 3575 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3576 if (Src0.isReg()) { 3577 auto Reg = mc2PseudoReg(Src0.getReg()); 3578 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3579 if (!isSGPR(Reg, TRI)) 3580 return true; 3581 ErrLoc = getRegLoc(Reg, Operands); 3582 } else { 3583 ErrLoc = getConstLoc(Operands); 3584 } 3585 3586 Error(ErrLoc, "source operand must be a VGPR"); 3587 return false; 3588 } 3589 3590 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3591 const OperandVector &Operands) { 3592 3593 const unsigned Opc = Inst.getOpcode(); 3594 3595 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3596 return true; 3597 3598 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3599 assert(Src0Idx != -1); 3600 3601 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3602 if (!Src0.isReg()) 3603 return true; 3604 3605 auto Reg = mc2PseudoReg(Src0.getReg()); 3606 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3607 if (isSGPR(Reg, TRI)) { 3608 Error(getRegLoc(Reg, Operands), 3609 "source operand must be either a VGPR or an inline constant"); 3610 return false; 3611 } 3612 3613 return true; 3614 } 3615 3616 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3617 switch (Inst.getOpcode()) { 3618 default: 3619 return true; 3620 case V_DIV_SCALE_F32_gfx6_gfx7: 3621 case V_DIV_SCALE_F32_vi: 3622 case V_DIV_SCALE_F32_gfx10: 3623 case V_DIV_SCALE_F64_gfx6_gfx7: 3624 case V_DIV_SCALE_F64_vi: 3625 case V_DIV_SCALE_F64_gfx10: 3626 break; 3627 } 3628 3629 // TODO: Check that src0 = src1 or src2. 3630 3631 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3632 AMDGPU::OpName::src2_modifiers, 3633 AMDGPU::OpName::src2_modifiers}) { 3634 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3635 .getImm() & 3636 SISrcMods::ABS) { 3637 return false; 3638 } 3639 } 3640 3641 return true; 3642 } 3643 3644 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3645 3646 const unsigned Opc = Inst.getOpcode(); 3647 const MCInstrDesc &Desc = MII.get(Opc); 3648 3649 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3650 return true; 3651 3652 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3653 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3654 if (isCI() || isSI()) 3655 return false; 3656 } 3657 3658 return true; 3659 } 3660 3661 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3662 const unsigned Opc = Inst.getOpcode(); 3663 const MCInstrDesc &Desc = MII.get(Opc); 3664 3665 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3666 return true; 3667 3668 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3669 if (DimIdx < 0) 3670 return true; 3671 3672 long Imm = Inst.getOperand(DimIdx).getImm(); 3673 if (Imm < 0 || Imm >= 8) 3674 return false; 3675 3676 return true; 3677 } 3678 3679 static bool IsRevOpcode(const unsigned Opcode) 3680 { 3681 switch (Opcode) { 3682 case AMDGPU::V_SUBREV_F32_e32: 3683 case AMDGPU::V_SUBREV_F32_e64: 3684 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3685 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3686 case AMDGPU::V_SUBREV_F32_e32_vi: 3687 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3688 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3689 case AMDGPU::V_SUBREV_F32_e64_vi: 3690 3691 case AMDGPU::V_SUBREV_CO_U32_e32: 3692 case AMDGPU::V_SUBREV_CO_U32_e64: 3693 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3694 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3695 3696 case AMDGPU::V_SUBBREV_U32_e32: 3697 case AMDGPU::V_SUBBREV_U32_e64: 3698 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3699 case AMDGPU::V_SUBBREV_U32_e32_vi: 3700 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3701 case AMDGPU::V_SUBBREV_U32_e64_vi: 3702 3703 case AMDGPU::V_SUBREV_U32_e32: 3704 case AMDGPU::V_SUBREV_U32_e64: 3705 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3706 case AMDGPU::V_SUBREV_U32_e32_vi: 3707 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3708 case AMDGPU::V_SUBREV_U32_e64_vi: 3709 3710 case AMDGPU::V_SUBREV_F16_e32: 3711 case AMDGPU::V_SUBREV_F16_e64: 3712 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3713 case AMDGPU::V_SUBREV_F16_e32_vi: 3714 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3715 case AMDGPU::V_SUBREV_F16_e64_vi: 3716 3717 case AMDGPU::V_SUBREV_U16_e32: 3718 case AMDGPU::V_SUBREV_U16_e64: 3719 case AMDGPU::V_SUBREV_U16_e32_vi: 3720 case AMDGPU::V_SUBREV_U16_e64_vi: 3721 3722 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3723 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3724 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3725 3726 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3727 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3728 3729 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3730 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3731 3732 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3733 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3734 3735 case AMDGPU::V_LSHRREV_B32_e32: 3736 case AMDGPU::V_LSHRREV_B32_e64: 3737 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3738 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3739 case AMDGPU::V_LSHRREV_B32_e32_vi: 3740 case AMDGPU::V_LSHRREV_B32_e64_vi: 3741 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3742 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3743 3744 case AMDGPU::V_ASHRREV_I32_e32: 3745 case AMDGPU::V_ASHRREV_I32_e64: 3746 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3747 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3748 case AMDGPU::V_ASHRREV_I32_e32_vi: 3749 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3750 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3751 case AMDGPU::V_ASHRREV_I32_e64_vi: 3752 3753 case AMDGPU::V_LSHLREV_B32_e32: 3754 case AMDGPU::V_LSHLREV_B32_e64: 3755 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3756 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3757 case AMDGPU::V_LSHLREV_B32_e32_vi: 3758 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3759 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3760 case AMDGPU::V_LSHLREV_B32_e64_vi: 3761 3762 case AMDGPU::V_LSHLREV_B16_e32: 3763 case AMDGPU::V_LSHLREV_B16_e64: 3764 case AMDGPU::V_LSHLREV_B16_e32_vi: 3765 case AMDGPU::V_LSHLREV_B16_e64_vi: 3766 case AMDGPU::V_LSHLREV_B16_gfx10: 3767 3768 case AMDGPU::V_LSHRREV_B16_e32: 3769 case AMDGPU::V_LSHRREV_B16_e64: 3770 case AMDGPU::V_LSHRREV_B16_e32_vi: 3771 case AMDGPU::V_LSHRREV_B16_e64_vi: 3772 case AMDGPU::V_LSHRREV_B16_gfx10: 3773 3774 case AMDGPU::V_ASHRREV_I16_e32: 3775 case AMDGPU::V_ASHRREV_I16_e64: 3776 case AMDGPU::V_ASHRREV_I16_e32_vi: 3777 case AMDGPU::V_ASHRREV_I16_e64_vi: 3778 case AMDGPU::V_ASHRREV_I16_gfx10: 3779 3780 case AMDGPU::V_LSHLREV_B64_e64: 3781 case AMDGPU::V_LSHLREV_B64_gfx10: 3782 case AMDGPU::V_LSHLREV_B64_vi: 3783 3784 case AMDGPU::V_LSHRREV_B64_e64: 3785 case AMDGPU::V_LSHRREV_B64_gfx10: 3786 case AMDGPU::V_LSHRREV_B64_vi: 3787 3788 case AMDGPU::V_ASHRREV_I64_e64: 3789 case AMDGPU::V_ASHRREV_I64_gfx10: 3790 case AMDGPU::V_ASHRREV_I64_vi: 3791 3792 case AMDGPU::V_PK_LSHLREV_B16: 3793 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3794 case AMDGPU::V_PK_LSHLREV_B16_vi: 3795 3796 case AMDGPU::V_PK_LSHRREV_B16: 3797 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3798 case AMDGPU::V_PK_LSHRREV_B16_vi: 3799 case AMDGPU::V_PK_ASHRREV_I16: 3800 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3801 case AMDGPU::V_PK_ASHRREV_I16_vi: 3802 return true; 3803 default: 3804 return false; 3805 } 3806 } 3807 3808 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3809 3810 using namespace SIInstrFlags; 3811 const unsigned Opcode = Inst.getOpcode(); 3812 const MCInstrDesc &Desc = MII.get(Opcode); 3813 3814 // lds_direct register is defined so that it can be used 3815 // with 9-bit operands only. Ignore encodings which do not accept these. 3816 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3817 if ((Desc.TSFlags & Enc) == 0) 3818 return None; 3819 3820 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3821 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3822 if (SrcIdx == -1) 3823 break; 3824 const auto &Src = Inst.getOperand(SrcIdx); 3825 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3826 3827 if (isGFX90A()) 3828 return StringRef("lds_direct is not supported on this GPU"); 3829 3830 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3831 return StringRef("lds_direct cannot be used with this instruction"); 3832 3833 if (SrcName != OpName::src0) 3834 return StringRef("lds_direct may be used as src0 only"); 3835 } 3836 } 3837 3838 return None; 3839 } 3840 3841 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3842 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3843 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3844 if (Op.isFlatOffset()) 3845 return Op.getStartLoc(); 3846 } 3847 return getLoc(); 3848 } 3849 3850 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3851 const OperandVector &Operands) { 3852 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3853 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3854 return true; 3855 3856 auto Opcode = Inst.getOpcode(); 3857 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3858 assert(OpNum != -1); 3859 3860 const auto &Op = Inst.getOperand(OpNum); 3861 if (!hasFlatOffsets() && Op.getImm() != 0) { 3862 Error(getFlatOffsetLoc(Operands), 3863 "flat offset modifier is not supported on this GPU"); 3864 return false; 3865 } 3866 3867 // For FLAT segment the offset must be positive; 3868 // MSB is ignored and forced to zero. 3869 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3870 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3871 if (!isIntN(OffsetSize, Op.getImm())) { 3872 Error(getFlatOffsetLoc(Operands), 3873 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3874 return false; 3875 } 3876 } else { 3877 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3878 if (!isUIntN(OffsetSize, Op.getImm())) { 3879 Error(getFlatOffsetLoc(Operands), 3880 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3881 return false; 3882 } 3883 } 3884 3885 return true; 3886 } 3887 3888 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3889 // Start with second operand because SMEM Offset cannot be dst or src0. 3890 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3891 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3892 if (Op.isSMEMOffset()) 3893 return Op.getStartLoc(); 3894 } 3895 return getLoc(); 3896 } 3897 3898 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3899 const OperandVector &Operands) { 3900 if (isCI() || isSI()) 3901 return true; 3902 3903 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3904 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3905 return true; 3906 3907 auto Opcode = Inst.getOpcode(); 3908 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3909 if (OpNum == -1) 3910 return true; 3911 3912 const auto &Op = Inst.getOperand(OpNum); 3913 if (!Op.isImm()) 3914 return true; 3915 3916 uint64_t Offset = Op.getImm(); 3917 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3918 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3919 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3920 return true; 3921 3922 Error(getSMEMOffsetLoc(Operands), 3923 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3924 "expected a 21-bit signed offset"); 3925 3926 return false; 3927 } 3928 3929 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3930 unsigned Opcode = Inst.getOpcode(); 3931 const MCInstrDesc &Desc = MII.get(Opcode); 3932 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3933 return true; 3934 3935 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3936 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3937 3938 const int OpIndices[] = { Src0Idx, Src1Idx }; 3939 3940 unsigned NumExprs = 0; 3941 unsigned NumLiterals = 0; 3942 uint32_t LiteralValue; 3943 3944 for (int OpIdx : OpIndices) { 3945 if (OpIdx == -1) break; 3946 3947 const MCOperand &MO = Inst.getOperand(OpIdx); 3948 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3949 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3950 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3951 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3952 if (NumLiterals == 0 || LiteralValue != Value) { 3953 LiteralValue = Value; 3954 ++NumLiterals; 3955 } 3956 } else if (MO.isExpr()) { 3957 ++NumExprs; 3958 } 3959 } 3960 } 3961 3962 return NumLiterals + NumExprs <= 1; 3963 } 3964 3965 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3966 const unsigned Opc = Inst.getOpcode(); 3967 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3968 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3969 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3970 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3971 3972 if (OpSel & ~3) 3973 return false; 3974 } 3975 return true; 3976 } 3977 3978 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, 3979 const OperandVector &Operands) { 3980 const unsigned Opc = Inst.getOpcode(); 3981 int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl); 3982 if (DppCtrlIdx < 0) 3983 return true; 3984 unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm(); 3985 3986 if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) { 3987 // DPP64 is supported for row_newbcast only. 3988 int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3989 if (Src0Idx >= 0 && 3990 getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) { 3991 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands); 3992 Error(S, "64 bit dpp only supports row_newbcast"); 3993 return false; 3994 } 3995 } 3996 3997 return true; 3998 } 3999 4000 // Check if VCC register matches wavefront size 4001 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 4002 auto FB = getFeatureBits(); 4003 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 4004 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 4005 } 4006 4007 // One unique literal can be used. VOP3 literal is only allowed in GFX10+ 4008 bool AMDGPUAsmParser::validateVOPLiteral(const MCInst &Inst, 4009 const OperandVector &Operands) { 4010 unsigned Opcode = Inst.getOpcode(); 4011 const MCInstrDesc &Desc = MII.get(Opcode); 4012 const int ImmIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm); 4013 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)) && 4014 ImmIdx == -1) 4015 return true; 4016 4017 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 4018 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 4019 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 4020 4021 const int OpIndices[] = {Src0Idx, Src1Idx, Src2Idx, ImmIdx}; 4022 4023 unsigned NumExprs = 0; 4024 unsigned NumLiterals = 0; 4025 uint32_t LiteralValue; 4026 4027 for (int OpIdx : OpIndices) { 4028 if (OpIdx == -1) 4029 continue; 4030 4031 const MCOperand &MO = Inst.getOperand(OpIdx); 4032 if (!MO.isImm() && !MO.isExpr()) 4033 continue; 4034 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 4035 continue; 4036 4037 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 4038 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 4039 Error(getConstLoc(Operands), 4040 "inline constants are not allowed for this operand"); 4041 return false; 4042 } 4043 4044 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 4045 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 4046 if (NumLiterals == 0 || LiteralValue != Value) { 4047 LiteralValue = Value; 4048 ++NumLiterals; 4049 } 4050 } else if (MO.isExpr()) { 4051 ++NumExprs; 4052 } 4053 } 4054 NumLiterals += NumExprs; 4055 4056 if (!NumLiterals) 4057 return true; 4058 4059 if (ImmIdx == -1 && !getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4060 Error(getLitLoc(Operands), "literal operands are not supported"); 4061 return false; 4062 } 4063 4064 if (NumLiterals > 1) { 4065 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4066 return false; 4067 } 4068 4069 return true; 4070 } 4071 4072 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4073 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4074 const MCRegisterInfo *MRI) { 4075 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4076 if (OpIdx < 0) 4077 return -1; 4078 4079 const MCOperand &Op = Inst.getOperand(OpIdx); 4080 if (!Op.isReg()) 4081 return -1; 4082 4083 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4084 auto Reg = Sub ? Sub : Op.getReg(); 4085 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4086 return AGPR32.contains(Reg) ? 1 : 0; 4087 } 4088 4089 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4090 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4091 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4092 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4093 SIInstrFlags::DS)) == 0) 4094 return true; 4095 4096 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4097 : AMDGPU::OpName::vdata; 4098 4099 const MCRegisterInfo *MRI = getMRI(); 4100 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4101 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4102 4103 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4104 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4105 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4106 return false; 4107 } 4108 4109 auto FB = getFeatureBits(); 4110 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4111 if (DataAreg < 0 || DstAreg < 0) 4112 return true; 4113 return DstAreg == DataAreg; 4114 } 4115 4116 return DstAreg < 1 && DataAreg < 1; 4117 } 4118 4119 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4120 auto FB = getFeatureBits(); 4121 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4122 return true; 4123 4124 const MCRegisterInfo *MRI = getMRI(); 4125 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4126 const MCRegisterClass &AGPR32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4127 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4128 const MCOperand &Op = Inst.getOperand(I); 4129 if (!Op.isReg()) 4130 continue; 4131 4132 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4133 if (!Sub) 4134 continue; 4135 4136 if (VGPR32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4137 return false; 4138 if (AGPR32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4139 return false; 4140 } 4141 4142 return true; 4143 } 4144 4145 // gfx90a has an undocumented limitation: 4146 // DS_GWS opcodes must use even aligned registers. 4147 bool AMDGPUAsmParser::validateGWS(const MCInst &Inst, 4148 const OperandVector &Operands) { 4149 if (!getFeatureBits()[AMDGPU::FeatureGFX90AInsts]) 4150 return true; 4151 4152 int Opc = Inst.getOpcode(); 4153 if (Opc != AMDGPU::DS_GWS_INIT_vi && Opc != AMDGPU::DS_GWS_BARRIER_vi && 4154 Opc != AMDGPU::DS_GWS_SEMA_BR_vi) 4155 return true; 4156 4157 const MCRegisterInfo *MRI = getMRI(); 4158 const MCRegisterClass &VGPR32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4159 int Data0Pos = 4160 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0); 4161 assert(Data0Pos != -1); 4162 auto Reg = Inst.getOperand(Data0Pos).getReg(); 4163 auto RegIdx = Reg - (VGPR32.contains(Reg) ? AMDGPU::VGPR0 : AMDGPU::AGPR0); 4164 if (RegIdx & 1) { 4165 SMLoc RegLoc = getRegLoc(Reg, Operands); 4166 Error(RegLoc, "vgpr must be even aligned"); 4167 return false; 4168 } 4169 4170 return true; 4171 } 4172 4173 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4174 const OperandVector &Operands, 4175 const SMLoc &IDLoc) { 4176 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4177 AMDGPU::OpName::cpol); 4178 if (CPolPos == -1) 4179 return true; 4180 4181 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4182 4183 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4184 if ((TSFlags & (SIInstrFlags::SMRD)) && 4185 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4186 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4187 return false; 4188 } 4189 4190 if (isGFX90A() && (CPol & CPol::SCC)) { 4191 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4192 StringRef CStr(S.getPointer()); 4193 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4194 Error(S, "scc is not supported on this GPU"); 4195 return false; 4196 } 4197 4198 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4199 return true; 4200 4201 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4202 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4203 Error(IDLoc, "instruction must use glc"); 4204 return false; 4205 } 4206 } else { 4207 if (CPol & CPol::GLC) { 4208 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4209 StringRef CStr(S.getPointer()); 4210 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4211 Error(S, "instruction must not use glc"); 4212 return false; 4213 } 4214 } 4215 4216 return true; 4217 } 4218 4219 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4220 const SMLoc &IDLoc, 4221 const OperandVector &Operands) { 4222 if (auto ErrMsg = validateLdsDirect(Inst)) { 4223 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4224 return false; 4225 } 4226 if (!validateSOPLiteral(Inst)) { 4227 Error(getLitLoc(Operands), 4228 "only one literal operand is allowed"); 4229 return false; 4230 } 4231 if (!validateVOPLiteral(Inst, Operands)) { 4232 return false; 4233 } 4234 if (!validateConstantBusLimitations(Inst, Operands)) { 4235 return false; 4236 } 4237 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4238 return false; 4239 } 4240 if (!validateIntClampSupported(Inst)) { 4241 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4242 "integer clamping is not supported on this GPU"); 4243 return false; 4244 } 4245 if (!validateOpSel(Inst)) { 4246 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4247 "invalid op_sel operand"); 4248 return false; 4249 } 4250 if (!validateDPP(Inst, Operands)) { 4251 return false; 4252 } 4253 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4254 if (!validateMIMGD16(Inst)) { 4255 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4256 "d16 modifier is not supported on this GPU"); 4257 return false; 4258 } 4259 if (!validateMIMGDim(Inst)) { 4260 Error(IDLoc, "dim modifier is required on this GPU"); 4261 return false; 4262 } 4263 if (!validateMIMGMSAA(Inst)) { 4264 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4265 "invalid dim; must be MSAA type"); 4266 return false; 4267 } 4268 if (!validateMIMGDataSize(Inst)) { 4269 Error(IDLoc, 4270 "image data size does not match dmask and tfe"); 4271 return false; 4272 } 4273 if (!validateMIMGAddrSize(Inst)) { 4274 Error(IDLoc, 4275 "image address size does not match dim and a16"); 4276 return false; 4277 } 4278 if (!validateMIMGAtomicDMask(Inst)) { 4279 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4280 "invalid atomic image dmask"); 4281 return false; 4282 } 4283 if (!validateMIMGGatherDMask(Inst)) { 4284 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4285 "invalid image_gather dmask: only one bit must be set"); 4286 return false; 4287 } 4288 if (!validateMovrels(Inst, Operands)) { 4289 return false; 4290 } 4291 if (!validateFlatOffset(Inst, Operands)) { 4292 return false; 4293 } 4294 if (!validateSMEMOffset(Inst, Operands)) { 4295 return false; 4296 } 4297 if (!validateMAIAccWrite(Inst, Operands)) { 4298 return false; 4299 } 4300 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4301 return false; 4302 } 4303 4304 if (!validateAGPRLdSt(Inst)) { 4305 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4306 ? "invalid register class: data and dst should be all VGPR or AGPR" 4307 : "invalid register class: agpr loads and stores not supported on this GPU" 4308 ); 4309 return false; 4310 } 4311 if (!validateVGPRAlign(Inst)) { 4312 Error(IDLoc, 4313 "invalid register class: vgpr tuples must be 64 bit aligned"); 4314 return false; 4315 } 4316 if (!validateGWS(Inst, Operands)) { 4317 return false; 4318 } 4319 4320 if (!validateDivScale(Inst)) { 4321 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4322 return false; 4323 } 4324 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4325 return false; 4326 } 4327 4328 return true; 4329 } 4330 4331 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4332 const FeatureBitset &FBS, 4333 unsigned VariantID = 0); 4334 4335 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4336 const FeatureBitset &AvailableFeatures, 4337 unsigned VariantID); 4338 4339 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4340 const FeatureBitset &FBS) { 4341 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4342 } 4343 4344 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4345 const FeatureBitset &FBS, 4346 ArrayRef<unsigned> Variants) { 4347 for (auto Variant : Variants) { 4348 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4349 return true; 4350 } 4351 4352 return false; 4353 } 4354 4355 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4356 const SMLoc &IDLoc) { 4357 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4358 4359 // Check if requested instruction variant is supported. 4360 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4361 return false; 4362 4363 // This instruction is not supported. 4364 // Clear any other pending errors because they are no longer relevant. 4365 getParser().clearPendingErrors(); 4366 4367 // Requested instruction variant is not supported. 4368 // Check if any other variants are supported. 4369 StringRef VariantName = getMatchedVariantName(); 4370 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4371 return Error(IDLoc, 4372 Twine(VariantName, 4373 " variant of this instruction is not supported")); 4374 } 4375 4376 // Finally check if this instruction is supported on any other GPU. 4377 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4378 return Error(IDLoc, "instruction not supported on this GPU"); 4379 } 4380 4381 // Instruction not supported on any GPU. Probably a typo. 4382 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4383 return Error(IDLoc, "invalid instruction" + Suggestion); 4384 } 4385 4386 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4387 OperandVector &Operands, 4388 MCStreamer &Out, 4389 uint64_t &ErrorInfo, 4390 bool MatchingInlineAsm) { 4391 MCInst Inst; 4392 unsigned Result = Match_Success; 4393 for (auto Variant : getMatchedVariants()) { 4394 uint64_t EI; 4395 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4396 Variant); 4397 // We order match statuses from least to most specific. We use most specific 4398 // status as resulting 4399 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4400 if ((R == Match_Success) || 4401 (R == Match_PreferE32) || 4402 (R == Match_MissingFeature && Result != Match_PreferE32) || 4403 (R == Match_InvalidOperand && Result != Match_MissingFeature 4404 && Result != Match_PreferE32) || 4405 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4406 && Result != Match_MissingFeature 4407 && Result != Match_PreferE32)) { 4408 Result = R; 4409 ErrorInfo = EI; 4410 } 4411 if (R == Match_Success) 4412 break; 4413 } 4414 4415 if (Result == Match_Success) { 4416 if (!validateInstruction(Inst, IDLoc, Operands)) { 4417 return true; 4418 } 4419 Inst.setLoc(IDLoc); 4420 Out.emitInstruction(Inst, getSTI()); 4421 return false; 4422 } 4423 4424 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4425 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4426 return true; 4427 } 4428 4429 switch (Result) { 4430 default: break; 4431 case Match_MissingFeature: 4432 // It has been verified that the specified instruction 4433 // mnemonic is valid. A match was found but it requires 4434 // features which are not supported on this GPU. 4435 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4436 4437 case Match_InvalidOperand: { 4438 SMLoc ErrorLoc = IDLoc; 4439 if (ErrorInfo != ~0ULL) { 4440 if (ErrorInfo >= Operands.size()) { 4441 return Error(IDLoc, "too few operands for instruction"); 4442 } 4443 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4444 if (ErrorLoc == SMLoc()) 4445 ErrorLoc = IDLoc; 4446 } 4447 return Error(ErrorLoc, "invalid operand for instruction"); 4448 } 4449 4450 case Match_PreferE32: 4451 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4452 "should be encoded as e32"); 4453 case Match_MnemonicFail: 4454 llvm_unreachable("Invalid instructions should have been handled already"); 4455 } 4456 llvm_unreachable("Implement any new match types added!"); 4457 } 4458 4459 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4460 int64_t Tmp = -1; 4461 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4462 return true; 4463 } 4464 if (getParser().parseAbsoluteExpression(Tmp)) { 4465 return true; 4466 } 4467 Ret = static_cast<uint32_t>(Tmp); 4468 return false; 4469 } 4470 4471 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4472 uint32_t &Minor) { 4473 if (ParseAsAbsoluteExpression(Major)) 4474 return TokError("invalid major version"); 4475 4476 if (!trySkipToken(AsmToken::Comma)) 4477 return TokError("minor version number required, comma expected"); 4478 4479 if (ParseAsAbsoluteExpression(Minor)) 4480 return TokError("invalid minor version"); 4481 4482 return false; 4483 } 4484 4485 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4486 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4487 return TokError("directive only supported for amdgcn architecture"); 4488 4489 std::string TargetIDDirective; 4490 SMLoc TargetStart = getTok().getLoc(); 4491 if (getParser().parseEscapedString(TargetIDDirective)) 4492 return true; 4493 4494 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4495 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4496 return getParser().Error(TargetRange.Start, 4497 (Twine(".amdgcn_target directive's target id ") + 4498 Twine(TargetIDDirective) + 4499 Twine(" does not match the specified target id ") + 4500 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4501 4502 return false; 4503 } 4504 4505 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4506 return Error(Range.Start, "value out of range", Range); 4507 } 4508 4509 bool AMDGPUAsmParser::calculateGPRBlocks( 4510 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4511 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4512 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4513 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4514 // TODO(scott.linder): These calculations are duplicated from 4515 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4516 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4517 4518 unsigned NumVGPRs = NextFreeVGPR; 4519 unsigned NumSGPRs = NextFreeSGPR; 4520 4521 if (Version.Major >= 10) 4522 NumSGPRs = 0; 4523 else { 4524 unsigned MaxAddressableNumSGPRs = 4525 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4526 4527 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4528 NumSGPRs > MaxAddressableNumSGPRs) 4529 return OutOfRangeError(SGPRRange); 4530 4531 NumSGPRs += 4532 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4533 4534 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4535 NumSGPRs > MaxAddressableNumSGPRs) 4536 return OutOfRangeError(SGPRRange); 4537 4538 if (Features.test(FeatureSGPRInitBug)) 4539 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4540 } 4541 4542 VGPRBlocks = 4543 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4544 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4545 4546 return false; 4547 } 4548 4549 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4550 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4551 return TokError("directive only supported for amdgcn architecture"); 4552 4553 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4554 return TokError("directive only supported for amdhsa OS"); 4555 4556 StringRef KernelName; 4557 if (getParser().parseIdentifier(KernelName)) 4558 return true; 4559 4560 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4561 4562 StringSet<> Seen; 4563 4564 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4565 4566 SMRange VGPRRange; 4567 uint64_t NextFreeVGPR = 0; 4568 uint64_t AccumOffset = 0; 4569 SMRange SGPRRange; 4570 uint64_t NextFreeSGPR = 0; 4571 4572 // Count the number of user SGPRs implied from the enabled feature bits. 4573 unsigned ImpliedUserSGPRCount = 0; 4574 4575 // Track if the asm explicitly contains the directive for the user SGPR 4576 // count. 4577 Optional<unsigned> ExplicitUserSGPRCount; 4578 bool ReserveVCC = true; 4579 bool ReserveFlatScr = true; 4580 Optional<bool> EnableWavefrontSize32; 4581 4582 while (true) { 4583 while (trySkipToken(AsmToken::EndOfStatement)); 4584 4585 StringRef ID; 4586 SMRange IDRange = getTok().getLocRange(); 4587 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4588 return true; 4589 4590 if (ID == ".end_amdhsa_kernel") 4591 break; 4592 4593 if (Seen.find(ID) != Seen.end()) 4594 return TokError(".amdhsa_ directives cannot be repeated"); 4595 Seen.insert(ID); 4596 4597 SMLoc ValStart = getLoc(); 4598 int64_t IVal; 4599 if (getParser().parseAbsoluteExpression(IVal)) 4600 return true; 4601 SMLoc ValEnd = getLoc(); 4602 SMRange ValRange = SMRange(ValStart, ValEnd); 4603 4604 if (IVal < 0) 4605 return OutOfRangeError(ValRange); 4606 4607 uint64_t Val = IVal; 4608 4609 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4610 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4611 return OutOfRangeError(RANGE); \ 4612 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4613 4614 if (ID == ".amdhsa_group_segment_fixed_size") { 4615 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4616 return OutOfRangeError(ValRange); 4617 KD.group_segment_fixed_size = Val; 4618 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4619 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4620 return OutOfRangeError(ValRange); 4621 KD.private_segment_fixed_size = Val; 4622 } else if (ID == ".amdhsa_kernarg_size") { 4623 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4624 return OutOfRangeError(ValRange); 4625 KD.kernarg_size = Val; 4626 } else if (ID == ".amdhsa_user_sgpr_count") { 4627 ExplicitUserSGPRCount = Val; 4628 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4629 if (hasArchitectedFlatScratch()) 4630 return Error(IDRange.Start, 4631 "directive is not supported with architected flat scratch", 4632 IDRange); 4633 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4634 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4635 Val, ValRange); 4636 if (Val) 4637 ImpliedUserSGPRCount += 4; 4638 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4639 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4640 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4641 ValRange); 4642 if (Val) 4643 ImpliedUserSGPRCount += 2; 4644 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4645 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4646 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4647 ValRange); 4648 if (Val) 4649 ImpliedUserSGPRCount += 2; 4650 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4651 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4652 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4653 Val, ValRange); 4654 if (Val) 4655 ImpliedUserSGPRCount += 2; 4656 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4657 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4658 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4659 ValRange); 4660 if (Val) 4661 ImpliedUserSGPRCount += 2; 4662 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4663 if (hasArchitectedFlatScratch()) 4664 return Error(IDRange.Start, 4665 "directive is not supported with architected flat scratch", 4666 IDRange); 4667 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4668 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4669 ValRange); 4670 if (Val) 4671 ImpliedUserSGPRCount += 2; 4672 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4673 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4674 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4675 Val, ValRange); 4676 if (Val) 4677 ImpliedUserSGPRCount += 1; 4678 } else if (ID == ".amdhsa_wavefront_size32") { 4679 if (IVersion.Major < 10) 4680 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4681 EnableWavefrontSize32 = Val; 4682 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4683 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4684 Val, ValRange); 4685 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4686 if (hasArchitectedFlatScratch()) 4687 return Error(IDRange.Start, 4688 "directive is not supported with architected flat scratch", 4689 IDRange); 4690 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4691 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4692 } else if (ID == ".amdhsa_enable_private_segment") { 4693 if (!hasArchitectedFlatScratch()) 4694 return Error( 4695 IDRange.Start, 4696 "directive is not supported without architected flat scratch", 4697 IDRange); 4698 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4699 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange); 4700 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4701 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4702 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4703 ValRange); 4704 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4705 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4706 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4707 ValRange); 4708 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4709 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4710 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4711 ValRange); 4712 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4713 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4714 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4715 ValRange); 4716 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4717 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4718 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4719 ValRange); 4720 } else if (ID == ".amdhsa_next_free_vgpr") { 4721 VGPRRange = ValRange; 4722 NextFreeVGPR = Val; 4723 } else if (ID == ".amdhsa_next_free_sgpr") { 4724 SGPRRange = ValRange; 4725 NextFreeSGPR = Val; 4726 } else if (ID == ".amdhsa_accum_offset") { 4727 if (!isGFX90A()) 4728 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4729 AccumOffset = Val; 4730 } else if (ID == ".amdhsa_reserve_vcc") { 4731 if (!isUInt<1>(Val)) 4732 return OutOfRangeError(ValRange); 4733 ReserveVCC = Val; 4734 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4735 if (IVersion.Major < 7) 4736 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4737 if (hasArchitectedFlatScratch()) 4738 return Error(IDRange.Start, 4739 "directive is not supported with architected flat scratch", 4740 IDRange); 4741 if (!isUInt<1>(Val)) 4742 return OutOfRangeError(ValRange); 4743 ReserveFlatScr = Val; 4744 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4745 if (IVersion.Major < 8) 4746 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4747 if (!isUInt<1>(Val)) 4748 return OutOfRangeError(ValRange); 4749 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4750 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4751 IDRange); 4752 } else if (ID == ".amdhsa_float_round_mode_32") { 4753 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4754 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4755 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4756 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4757 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4758 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4759 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4760 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4761 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4763 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4764 ValRange); 4765 } else if (ID == ".amdhsa_dx10_clamp") { 4766 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4767 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4768 } else if (ID == ".amdhsa_ieee_mode") { 4769 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4770 Val, ValRange); 4771 } else if (ID == ".amdhsa_fp16_overflow") { 4772 if (IVersion.Major < 9) 4773 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4775 ValRange); 4776 } else if (ID == ".amdhsa_tg_split") { 4777 if (!isGFX90A()) 4778 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4779 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4780 ValRange); 4781 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4782 if (IVersion.Major < 10) 4783 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4784 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4785 ValRange); 4786 } else if (ID == ".amdhsa_memory_ordered") { 4787 if (IVersion.Major < 10) 4788 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4789 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4790 ValRange); 4791 } else if (ID == ".amdhsa_forward_progress") { 4792 if (IVersion.Major < 10) 4793 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4794 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4795 ValRange); 4796 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4797 PARSE_BITS_ENTRY( 4798 KD.compute_pgm_rsrc2, 4799 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4800 ValRange); 4801 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4802 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4803 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4804 Val, ValRange); 4805 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4806 PARSE_BITS_ENTRY( 4807 KD.compute_pgm_rsrc2, 4808 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4809 ValRange); 4810 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4811 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4812 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4813 Val, ValRange); 4814 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4815 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4816 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4817 Val, ValRange); 4818 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4819 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4820 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4821 Val, ValRange); 4822 } else if (ID == ".amdhsa_exception_int_div_zero") { 4823 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4824 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4825 Val, ValRange); 4826 } else { 4827 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4828 } 4829 4830 #undef PARSE_BITS_ENTRY 4831 } 4832 4833 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4834 return TokError(".amdhsa_next_free_vgpr directive is required"); 4835 4836 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4837 return TokError(".amdhsa_next_free_sgpr directive is required"); 4838 4839 unsigned VGPRBlocks; 4840 unsigned SGPRBlocks; 4841 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4842 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4843 EnableWavefrontSize32, NextFreeVGPR, 4844 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4845 SGPRBlocks)) 4846 return true; 4847 4848 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4849 VGPRBlocks)) 4850 return OutOfRangeError(VGPRRange); 4851 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4852 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4853 4854 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4855 SGPRBlocks)) 4856 return OutOfRangeError(SGPRRange); 4857 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4858 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4859 SGPRBlocks); 4860 4861 if (ExplicitUserSGPRCount && ImpliedUserSGPRCount > *ExplicitUserSGPRCount) 4862 return TokError("amdgpu_user_sgpr_count smaller than than implied by " 4863 "enabled user SGPRs"); 4864 4865 unsigned UserSGPRCount = 4866 ExplicitUserSGPRCount ? *ExplicitUserSGPRCount : ImpliedUserSGPRCount; 4867 4868 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4869 return TokError("too many user SGPRs enabled"); 4870 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4871 UserSGPRCount); 4872 4873 if (isGFX90A()) { 4874 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4875 return TokError(".amdhsa_accum_offset directive is required"); 4876 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4877 return TokError("accum_offset should be in range [4..256] in " 4878 "increments of 4"); 4879 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4880 return TokError("accum_offset exceeds total VGPR allocation"); 4881 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4882 (AccumOffset / 4 - 1)); 4883 } 4884 4885 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4886 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4887 ReserveFlatScr); 4888 return false; 4889 } 4890 4891 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4892 uint32_t Major; 4893 uint32_t Minor; 4894 4895 if (ParseDirectiveMajorMinor(Major, Minor)) 4896 return true; 4897 4898 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4899 return false; 4900 } 4901 4902 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4903 uint32_t Major; 4904 uint32_t Minor; 4905 uint32_t Stepping; 4906 StringRef VendorName; 4907 StringRef ArchName; 4908 4909 // If this directive has no arguments, then use the ISA version for the 4910 // targeted GPU. 4911 if (isToken(AsmToken::EndOfStatement)) { 4912 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4913 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4914 ISA.Stepping, 4915 "AMD", "AMDGPU"); 4916 return false; 4917 } 4918 4919 if (ParseDirectiveMajorMinor(Major, Minor)) 4920 return true; 4921 4922 if (!trySkipToken(AsmToken::Comma)) 4923 return TokError("stepping version number required, comma expected"); 4924 4925 if (ParseAsAbsoluteExpression(Stepping)) 4926 return TokError("invalid stepping version"); 4927 4928 if (!trySkipToken(AsmToken::Comma)) 4929 return TokError("vendor name required, comma expected"); 4930 4931 if (!parseString(VendorName, "invalid vendor name")) 4932 return true; 4933 4934 if (!trySkipToken(AsmToken::Comma)) 4935 return TokError("arch name required, comma expected"); 4936 4937 if (!parseString(ArchName, "invalid arch name")) 4938 return true; 4939 4940 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4941 VendorName, ArchName); 4942 return false; 4943 } 4944 4945 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4946 amd_kernel_code_t &Header) { 4947 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4948 // assembly for backwards compatibility. 4949 if (ID == "max_scratch_backing_memory_byte_size") { 4950 Parser.eatToEndOfStatement(); 4951 return false; 4952 } 4953 4954 SmallString<40> ErrStr; 4955 raw_svector_ostream Err(ErrStr); 4956 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4957 return TokError(Err.str()); 4958 } 4959 Lex(); 4960 4961 if (ID == "enable_wavefront_size32") { 4962 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4963 if (!isGFX10Plus()) 4964 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4965 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4966 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4967 } else { 4968 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4969 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4970 } 4971 } 4972 4973 if (ID == "wavefront_size") { 4974 if (Header.wavefront_size == 5) { 4975 if (!isGFX10Plus()) 4976 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4977 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4978 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4979 } else if (Header.wavefront_size == 6) { 4980 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4981 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4982 } 4983 } 4984 4985 if (ID == "enable_wgp_mode") { 4986 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4987 !isGFX10Plus()) 4988 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4989 } 4990 4991 if (ID == "enable_mem_ordered") { 4992 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4993 !isGFX10Plus()) 4994 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4995 } 4996 4997 if (ID == "enable_fwd_progress") { 4998 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4999 !isGFX10Plus()) 5000 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 5001 } 5002 5003 return false; 5004 } 5005 5006 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 5007 amd_kernel_code_t Header; 5008 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 5009 5010 while (true) { 5011 // Lex EndOfStatement. This is in a while loop, because lexing a comment 5012 // will set the current token to EndOfStatement. 5013 while(trySkipToken(AsmToken::EndOfStatement)); 5014 5015 StringRef ID; 5016 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 5017 return true; 5018 5019 if (ID == ".end_amd_kernel_code_t") 5020 break; 5021 5022 if (ParseAMDKernelCodeTValue(ID, Header)) 5023 return true; 5024 } 5025 5026 getTargetStreamer().EmitAMDKernelCodeT(Header); 5027 5028 return false; 5029 } 5030 5031 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 5032 StringRef KernelName; 5033 if (!parseId(KernelName, "expected symbol name")) 5034 return true; 5035 5036 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 5037 ELF::STT_AMDGPU_HSA_KERNEL); 5038 5039 KernelScope.initialize(getContext()); 5040 return false; 5041 } 5042 5043 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 5044 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 5045 return Error(getLoc(), 5046 ".amd_amdgpu_isa directive is not available on non-amdgcn " 5047 "architectures"); 5048 } 5049 5050 auto TargetIDDirective = getLexer().getTok().getStringContents(); 5051 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 5052 return Error(getParser().getTok().getLoc(), "target id must match options"); 5053 5054 getTargetStreamer().EmitISAVersion(); 5055 Lex(); 5056 5057 return false; 5058 } 5059 5060 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 5061 const char *AssemblerDirectiveBegin; 5062 const char *AssemblerDirectiveEnd; 5063 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 5064 isHsaAbiVersion3Or4(&getSTI()) 5065 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 5066 HSAMD::V3::AssemblerDirectiveEnd) 5067 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 5068 HSAMD::AssemblerDirectiveEnd); 5069 5070 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 5071 return Error(getLoc(), 5072 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 5073 "not available on non-amdhsa OSes")).str()); 5074 } 5075 5076 std::string HSAMetadataString; 5077 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 5078 HSAMetadataString)) 5079 return true; 5080 5081 if (isHsaAbiVersion3Or4(&getSTI())) { 5082 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 5083 return Error(getLoc(), "invalid HSA metadata"); 5084 } else { 5085 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 5086 return Error(getLoc(), "invalid HSA metadata"); 5087 } 5088 5089 return false; 5090 } 5091 5092 /// Common code to parse out a block of text (typically YAML) between start and 5093 /// end directives. 5094 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 5095 const char *AssemblerDirectiveEnd, 5096 std::string &CollectString) { 5097 5098 raw_string_ostream CollectStream(CollectString); 5099 5100 getLexer().setSkipSpace(false); 5101 5102 bool FoundEnd = false; 5103 while (!isToken(AsmToken::Eof)) { 5104 while (isToken(AsmToken::Space)) { 5105 CollectStream << getTokenStr(); 5106 Lex(); 5107 } 5108 5109 if (trySkipId(AssemblerDirectiveEnd)) { 5110 FoundEnd = true; 5111 break; 5112 } 5113 5114 CollectStream << Parser.parseStringToEndOfStatement() 5115 << getContext().getAsmInfo()->getSeparatorString(); 5116 5117 Parser.eatToEndOfStatement(); 5118 } 5119 5120 getLexer().setSkipSpace(true); 5121 5122 if (isToken(AsmToken::Eof) && !FoundEnd) { 5123 return TokError(Twine("expected directive ") + 5124 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5125 } 5126 5127 CollectStream.flush(); 5128 return false; 5129 } 5130 5131 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5132 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5133 std::string String; 5134 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5135 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5136 return true; 5137 5138 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5139 if (!PALMetadata->setFromString(String)) 5140 return Error(getLoc(), "invalid PAL metadata"); 5141 return false; 5142 } 5143 5144 /// Parse the assembler directive for old linear-format PAL metadata. 5145 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5146 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5147 return Error(getLoc(), 5148 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5149 "not available on non-amdpal OSes")).str()); 5150 } 5151 5152 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5153 PALMetadata->setLegacy(); 5154 for (;;) { 5155 uint32_t Key, Value; 5156 if (ParseAsAbsoluteExpression(Key)) { 5157 return TokError(Twine("invalid value in ") + 5158 Twine(PALMD::AssemblerDirective)); 5159 } 5160 if (!trySkipToken(AsmToken::Comma)) { 5161 return TokError(Twine("expected an even number of values in ") + 5162 Twine(PALMD::AssemblerDirective)); 5163 } 5164 if (ParseAsAbsoluteExpression(Value)) { 5165 return TokError(Twine("invalid value in ") + 5166 Twine(PALMD::AssemblerDirective)); 5167 } 5168 PALMetadata->setRegister(Key, Value); 5169 if (!trySkipToken(AsmToken::Comma)) 5170 break; 5171 } 5172 return false; 5173 } 5174 5175 /// ParseDirectiveAMDGPULDS 5176 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5177 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5178 if (getParser().checkForValidSection()) 5179 return true; 5180 5181 StringRef Name; 5182 SMLoc NameLoc = getLoc(); 5183 if (getParser().parseIdentifier(Name)) 5184 return TokError("expected identifier in directive"); 5185 5186 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5187 if (parseToken(AsmToken::Comma, "expected ','")) 5188 return true; 5189 5190 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5191 5192 int64_t Size; 5193 SMLoc SizeLoc = getLoc(); 5194 if (getParser().parseAbsoluteExpression(Size)) 5195 return true; 5196 if (Size < 0) 5197 return Error(SizeLoc, "size must be non-negative"); 5198 if (Size > LocalMemorySize) 5199 return Error(SizeLoc, "size is too large"); 5200 5201 int64_t Alignment = 4; 5202 if (trySkipToken(AsmToken::Comma)) { 5203 SMLoc AlignLoc = getLoc(); 5204 if (getParser().parseAbsoluteExpression(Alignment)) 5205 return true; 5206 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5207 return Error(AlignLoc, "alignment must be a power of two"); 5208 5209 // Alignment larger than the size of LDS is possible in theory, as long 5210 // as the linker manages to place to symbol at address 0, but we do want 5211 // to make sure the alignment fits nicely into a 32-bit integer. 5212 if (Alignment >= 1u << 31) 5213 return Error(AlignLoc, "alignment is too large"); 5214 } 5215 5216 if (parseToken(AsmToken::EndOfStatement, 5217 "unexpected token in '.amdgpu_lds' directive")) 5218 return true; 5219 5220 Symbol->redefineIfPossible(); 5221 if (!Symbol->isUndefined()) 5222 return Error(NameLoc, "invalid symbol redefinition"); 5223 5224 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5225 return false; 5226 } 5227 5228 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5229 StringRef IDVal = DirectiveID.getString(); 5230 5231 if (isHsaAbiVersion3Or4(&getSTI())) { 5232 if (IDVal == ".amdhsa_kernel") 5233 return ParseDirectiveAMDHSAKernel(); 5234 5235 // TODO: Restructure/combine with PAL metadata directive. 5236 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5237 return ParseDirectiveHSAMetadata(); 5238 } else { 5239 if (IDVal == ".hsa_code_object_version") 5240 return ParseDirectiveHSACodeObjectVersion(); 5241 5242 if (IDVal == ".hsa_code_object_isa") 5243 return ParseDirectiveHSACodeObjectISA(); 5244 5245 if (IDVal == ".amd_kernel_code_t") 5246 return ParseDirectiveAMDKernelCodeT(); 5247 5248 if (IDVal == ".amdgpu_hsa_kernel") 5249 return ParseDirectiveAMDGPUHsaKernel(); 5250 5251 if (IDVal == ".amd_amdgpu_isa") 5252 return ParseDirectiveISAVersion(); 5253 5254 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5255 return ParseDirectiveHSAMetadata(); 5256 } 5257 5258 if (IDVal == ".amdgcn_target") 5259 return ParseDirectiveAMDGCNTarget(); 5260 5261 if (IDVal == ".amdgpu_lds") 5262 return ParseDirectiveAMDGPULDS(); 5263 5264 if (IDVal == PALMD::AssemblerDirectiveBegin) 5265 return ParseDirectivePALMetadataBegin(); 5266 5267 if (IDVal == PALMD::AssemblerDirective) 5268 return ParseDirectivePALMetadata(); 5269 5270 return true; 5271 } 5272 5273 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5274 unsigned RegNo) { 5275 5276 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5277 R.isValid(); ++R) { 5278 if (*R == RegNo) 5279 return isGFX9Plus(); 5280 } 5281 5282 // GFX10 has 2 more SGPRs 104 and 105. 5283 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5284 R.isValid(); ++R) { 5285 if (*R == RegNo) 5286 return hasSGPR104_SGPR105(); 5287 } 5288 5289 switch (RegNo) { 5290 case AMDGPU::SRC_SHARED_BASE: 5291 case AMDGPU::SRC_SHARED_LIMIT: 5292 case AMDGPU::SRC_PRIVATE_BASE: 5293 case AMDGPU::SRC_PRIVATE_LIMIT: 5294 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5295 return isGFX9Plus(); 5296 case AMDGPU::TBA: 5297 case AMDGPU::TBA_LO: 5298 case AMDGPU::TBA_HI: 5299 case AMDGPU::TMA: 5300 case AMDGPU::TMA_LO: 5301 case AMDGPU::TMA_HI: 5302 return !isGFX9Plus(); 5303 case AMDGPU::XNACK_MASK: 5304 case AMDGPU::XNACK_MASK_LO: 5305 case AMDGPU::XNACK_MASK_HI: 5306 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5307 case AMDGPU::SGPR_NULL: 5308 return isGFX10Plus(); 5309 default: 5310 break; 5311 } 5312 5313 if (isCI()) 5314 return true; 5315 5316 if (isSI() || isGFX10Plus()) { 5317 // No flat_scr on SI. 5318 // On GFX10 flat scratch is not a valid register operand and can only be 5319 // accessed with s_setreg/s_getreg. 5320 switch (RegNo) { 5321 case AMDGPU::FLAT_SCR: 5322 case AMDGPU::FLAT_SCR_LO: 5323 case AMDGPU::FLAT_SCR_HI: 5324 return false; 5325 default: 5326 return true; 5327 } 5328 } 5329 5330 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5331 // SI/CI have. 5332 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5333 R.isValid(); ++R) { 5334 if (*R == RegNo) 5335 return hasSGPR102_SGPR103(); 5336 } 5337 5338 return true; 5339 } 5340 5341 OperandMatchResultTy 5342 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5343 OperandMode Mode) { 5344 // Try to parse with a custom parser 5345 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5346 5347 // If we successfully parsed the operand or if there as an error parsing, 5348 // we are done. 5349 // 5350 // If we are parsing after we reach EndOfStatement then this means we 5351 // are appending default values to the Operands list. This is only done 5352 // by custom parser, so we shouldn't continue on to the generic parsing. 5353 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5354 isToken(AsmToken::EndOfStatement)) 5355 return ResTy; 5356 5357 SMLoc RBraceLoc; 5358 SMLoc LBraceLoc = getLoc(); 5359 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5360 unsigned Prefix = Operands.size(); 5361 5362 for (;;) { 5363 auto Loc = getLoc(); 5364 ResTy = parseReg(Operands); 5365 if (ResTy == MatchOperand_NoMatch) 5366 Error(Loc, "expected a register"); 5367 if (ResTy != MatchOperand_Success) 5368 return MatchOperand_ParseFail; 5369 5370 RBraceLoc = getLoc(); 5371 if (trySkipToken(AsmToken::RBrac)) 5372 break; 5373 5374 if (!skipToken(AsmToken::Comma, 5375 "expected a comma or a closing square bracket")) { 5376 return MatchOperand_ParseFail; 5377 } 5378 } 5379 5380 if (Operands.size() - Prefix > 1) { 5381 Operands.insert(Operands.begin() + Prefix, 5382 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5383 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5384 } 5385 5386 return MatchOperand_Success; 5387 } 5388 5389 return parseRegOrImm(Operands); 5390 } 5391 5392 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5393 // Clear any forced encodings from the previous instruction. 5394 setForcedEncodingSize(0); 5395 setForcedDPP(false); 5396 setForcedSDWA(false); 5397 5398 if (Name.endswith("_e64")) { 5399 setForcedEncodingSize(64); 5400 return Name.substr(0, Name.size() - 4); 5401 } else if (Name.endswith("_e32")) { 5402 setForcedEncodingSize(32); 5403 return Name.substr(0, Name.size() - 4); 5404 } else if (Name.endswith("_dpp")) { 5405 setForcedDPP(true); 5406 return Name.substr(0, Name.size() - 4); 5407 } else if (Name.endswith("_sdwa")) { 5408 setForcedSDWA(true); 5409 return Name.substr(0, Name.size() - 5); 5410 } 5411 return Name; 5412 } 5413 5414 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5415 StringRef Name, 5416 SMLoc NameLoc, OperandVector &Operands) { 5417 // Add the instruction mnemonic 5418 Name = parseMnemonicSuffix(Name); 5419 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5420 5421 bool IsMIMG = Name.startswith("image_"); 5422 5423 while (!trySkipToken(AsmToken::EndOfStatement)) { 5424 OperandMode Mode = OperandMode_Default; 5425 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5426 Mode = OperandMode_NSA; 5427 CPolSeen = 0; 5428 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5429 5430 if (Res != MatchOperand_Success) { 5431 checkUnsupportedInstruction(Name, NameLoc); 5432 if (!Parser.hasPendingError()) { 5433 // FIXME: use real operand location rather than the current location. 5434 StringRef Msg = 5435 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5436 "not a valid operand."; 5437 Error(getLoc(), Msg); 5438 } 5439 while (!trySkipToken(AsmToken::EndOfStatement)) { 5440 lex(); 5441 } 5442 return true; 5443 } 5444 5445 // Eat the comma or space if there is one. 5446 trySkipToken(AsmToken::Comma); 5447 } 5448 5449 return false; 5450 } 5451 5452 //===----------------------------------------------------------------------===// 5453 // Utility functions 5454 //===----------------------------------------------------------------------===// 5455 5456 OperandMatchResultTy 5457 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5458 5459 if (!trySkipId(Prefix, AsmToken::Colon)) 5460 return MatchOperand_NoMatch; 5461 5462 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5463 } 5464 5465 OperandMatchResultTy 5466 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5467 AMDGPUOperand::ImmTy ImmTy, 5468 bool (*ConvertResult)(int64_t&)) { 5469 SMLoc S = getLoc(); 5470 int64_t Value = 0; 5471 5472 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5473 if (Res != MatchOperand_Success) 5474 return Res; 5475 5476 if (ConvertResult && !ConvertResult(Value)) { 5477 Error(S, "invalid " + StringRef(Prefix) + " value."); 5478 } 5479 5480 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5481 return MatchOperand_Success; 5482 } 5483 5484 OperandMatchResultTy 5485 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5486 OperandVector &Operands, 5487 AMDGPUOperand::ImmTy ImmTy, 5488 bool (*ConvertResult)(int64_t&)) { 5489 SMLoc S = getLoc(); 5490 if (!trySkipId(Prefix, AsmToken::Colon)) 5491 return MatchOperand_NoMatch; 5492 5493 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5494 return MatchOperand_ParseFail; 5495 5496 unsigned Val = 0; 5497 const unsigned MaxSize = 4; 5498 5499 // FIXME: How to verify the number of elements matches the number of src 5500 // operands? 5501 for (int I = 0; ; ++I) { 5502 int64_t Op; 5503 SMLoc Loc = getLoc(); 5504 if (!parseExpr(Op)) 5505 return MatchOperand_ParseFail; 5506 5507 if (Op != 0 && Op != 1) { 5508 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5509 return MatchOperand_ParseFail; 5510 } 5511 5512 Val |= (Op << I); 5513 5514 if (trySkipToken(AsmToken::RBrac)) 5515 break; 5516 5517 if (I + 1 == MaxSize) { 5518 Error(getLoc(), "expected a closing square bracket"); 5519 return MatchOperand_ParseFail; 5520 } 5521 5522 if (!skipToken(AsmToken::Comma, "expected a comma")) 5523 return MatchOperand_ParseFail; 5524 } 5525 5526 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5527 return MatchOperand_Success; 5528 } 5529 5530 OperandMatchResultTy 5531 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5532 AMDGPUOperand::ImmTy ImmTy) { 5533 int64_t Bit; 5534 SMLoc S = getLoc(); 5535 5536 if (trySkipId(Name)) { 5537 Bit = 1; 5538 } else if (trySkipId("no", Name)) { 5539 Bit = 0; 5540 } else { 5541 return MatchOperand_NoMatch; 5542 } 5543 5544 if (Name == "r128" && !hasMIMG_R128()) { 5545 Error(S, "r128 modifier is not supported on this GPU"); 5546 return MatchOperand_ParseFail; 5547 } 5548 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5549 Error(S, "a16 modifier is not supported on this GPU"); 5550 return MatchOperand_ParseFail; 5551 } 5552 5553 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5554 ImmTy = AMDGPUOperand::ImmTyR128A16; 5555 5556 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5557 return MatchOperand_Success; 5558 } 5559 5560 OperandMatchResultTy 5561 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5562 unsigned CPolOn = 0; 5563 unsigned CPolOff = 0; 5564 SMLoc S = getLoc(); 5565 5566 if (trySkipId("glc")) 5567 CPolOn = AMDGPU::CPol::GLC; 5568 else if (trySkipId("noglc")) 5569 CPolOff = AMDGPU::CPol::GLC; 5570 else if (trySkipId("slc")) 5571 CPolOn = AMDGPU::CPol::SLC; 5572 else if (trySkipId("noslc")) 5573 CPolOff = AMDGPU::CPol::SLC; 5574 else if (trySkipId("dlc")) 5575 CPolOn = AMDGPU::CPol::DLC; 5576 else if (trySkipId("nodlc")) 5577 CPolOff = AMDGPU::CPol::DLC; 5578 else if (trySkipId("scc")) 5579 CPolOn = AMDGPU::CPol::SCC; 5580 else if (trySkipId("noscc")) 5581 CPolOff = AMDGPU::CPol::SCC; 5582 else 5583 return MatchOperand_NoMatch; 5584 5585 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5586 Error(S, "dlc modifier is not supported on this GPU"); 5587 return MatchOperand_ParseFail; 5588 } 5589 5590 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5591 Error(S, "scc modifier is not supported on this GPU"); 5592 return MatchOperand_ParseFail; 5593 } 5594 5595 if (CPolSeen & (CPolOn | CPolOff)) { 5596 Error(S, "duplicate cache policy modifier"); 5597 return MatchOperand_ParseFail; 5598 } 5599 5600 CPolSeen |= (CPolOn | CPolOff); 5601 5602 for (unsigned I = 1; I != Operands.size(); ++I) { 5603 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5604 if (Op.isCPol()) { 5605 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5606 return MatchOperand_Success; 5607 } 5608 } 5609 5610 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5611 AMDGPUOperand::ImmTyCPol)); 5612 5613 return MatchOperand_Success; 5614 } 5615 5616 static void addOptionalImmOperand( 5617 MCInst& Inst, const OperandVector& Operands, 5618 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5619 AMDGPUOperand::ImmTy ImmT, 5620 int64_t Default = 0) { 5621 auto i = OptionalIdx.find(ImmT); 5622 if (i != OptionalIdx.end()) { 5623 unsigned Idx = i->second; 5624 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5625 } else { 5626 Inst.addOperand(MCOperand::createImm(Default)); 5627 } 5628 } 5629 5630 OperandMatchResultTy 5631 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5632 StringRef &Value, 5633 SMLoc &StringLoc) { 5634 if (!trySkipId(Prefix, AsmToken::Colon)) 5635 return MatchOperand_NoMatch; 5636 5637 StringLoc = getLoc(); 5638 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5639 : MatchOperand_ParseFail; 5640 } 5641 5642 //===----------------------------------------------------------------------===// 5643 // MTBUF format 5644 //===----------------------------------------------------------------------===// 5645 5646 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5647 int64_t MaxVal, 5648 int64_t &Fmt) { 5649 int64_t Val; 5650 SMLoc Loc = getLoc(); 5651 5652 auto Res = parseIntWithPrefix(Pref, Val); 5653 if (Res == MatchOperand_ParseFail) 5654 return false; 5655 if (Res == MatchOperand_NoMatch) 5656 return true; 5657 5658 if (Val < 0 || Val > MaxVal) { 5659 Error(Loc, Twine("out of range ", StringRef(Pref))); 5660 return false; 5661 } 5662 5663 Fmt = Val; 5664 return true; 5665 } 5666 5667 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5668 // values to live in a joint format operand in the MCInst encoding. 5669 OperandMatchResultTy 5670 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5671 using namespace llvm::AMDGPU::MTBUFFormat; 5672 5673 int64_t Dfmt = DFMT_UNDEF; 5674 int64_t Nfmt = NFMT_UNDEF; 5675 5676 // dfmt and nfmt can appear in either order, and each is optional. 5677 for (int I = 0; I < 2; ++I) { 5678 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5679 return MatchOperand_ParseFail; 5680 5681 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5682 return MatchOperand_ParseFail; 5683 } 5684 // Skip optional comma between dfmt/nfmt 5685 // but guard against 2 commas following each other. 5686 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5687 !peekToken().is(AsmToken::Comma)) { 5688 trySkipToken(AsmToken::Comma); 5689 } 5690 } 5691 5692 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5693 return MatchOperand_NoMatch; 5694 5695 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5696 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5697 5698 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5699 return MatchOperand_Success; 5700 } 5701 5702 OperandMatchResultTy 5703 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5704 using namespace llvm::AMDGPU::MTBUFFormat; 5705 5706 int64_t Fmt = UFMT_UNDEF; 5707 5708 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5709 return MatchOperand_ParseFail; 5710 5711 if (Fmt == UFMT_UNDEF) 5712 return MatchOperand_NoMatch; 5713 5714 Format = Fmt; 5715 return MatchOperand_Success; 5716 } 5717 5718 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5719 int64_t &Nfmt, 5720 StringRef FormatStr, 5721 SMLoc Loc) { 5722 using namespace llvm::AMDGPU::MTBUFFormat; 5723 int64_t Format; 5724 5725 Format = getDfmt(FormatStr); 5726 if (Format != DFMT_UNDEF) { 5727 Dfmt = Format; 5728 return true; 5729 } 5730 5731 Format = getNfmt(FormatStr, getSTI()); 5732 if (Format != NFMT_UNDEF) { 5733 Nfmt = Format; 5734 return true; 5735 } 5736 5737 Error(Loc, "unsupported format"); 5738 return false; 5739 } 5740 5741 OperandMatchResultTy 5742 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5743 SMLoc FormatLoc, 5744 int64_t &Format) { 5745 using namespace llvm::AMDGPU::MTBUFFormat; 5746 5747 int64_t Dfmt = DFMT_UNDEF; 5748 int64_t Nfmt = NFMT_UNDEF; 5749 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5750 return MatchOperand_ParseFail; 5751 5752 if (trySkipToken(AsmToken::Comma)) { 5753 StringRef Str; 5754 SMLoc Loc = getLoc(); 5755 if (!parseId(Str, "expected a format string") || 5756 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5757 return MatchOperand_ParseFail; 5758 } 5759 if (Dfmt == DFMT_UNDEF) { 5760 Error(Loc, "duplicate numeric format"); 5761 return MatchOperand_ParseFail; 5762 } else if (Nfmt == NFMT_UNDEF) { 5763 Error(Loc, "duplicate data format"); 5764 return MatchOperand_ParseFail; 5765 } 5766 } 5767 5768 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5769 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5770 5771 if (isGFX10Plus()) { 5772 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5773 if (Ufmt == UFMT_UNDEF) { 5774 Error(FormatLoc, "unsupported format"); 5775 return MatchOperand_ParseFail; 5776 } 5777 Format = Ufmt; 5778 } else { 5779 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5780 } 5781 5782 return MatchOperand_Success; 5783 } 5784 5785 OperandMatchResultTy 5786 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5787 SMLoc Loc, 5788 int64_t &Format) { 5789 using namespace llvm::AMDGPU::MTBUFFormat; 5790 5791 auto Id = getUnifiedFormat(FormatStr); 5792 if (Id == UFMT_UNDEF) 5793 return MatchOperand_NoMatch; 5794 5795 if (!isGFX10Plus()) { 5796 Error(Loc, "unified format is not supported on this GPU"); 5797 return MatchOperand_ParseFail; 5798 } 5799 5800 Format = Id; 5801 return MatchOperand_Success; 5802 } 5803 5804 OperandMatchResultTy 5805 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5806 using namespace llvm::AMDGPU::MTBUFFormat; 5807 SMLoc Loc = getLoc(); 5808 5809 if (!parseExpr(Format)) 5810 return MatchOperand_ParseFail; 5811 if (!isValidFormatEncoding(Format, getSTI())) { 5812 Error(Loc, "out of range format"); 5813 return MatchOperand_ParseFail; 5814 } 5815 5816 return MatchOperand_Success; 5817 } 5818 5819 OperandMatchResultTy 5820 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5821 using namespace llvm::AMDGPU::MTBUFFormat; 5822 5823 if (!trySkipId("format", AsmToken::Colon)) 5824 return MatchOperand_NoMatch; 5825 5826 if (trySkipToken(AsmToken::LBrac)) { 5827 StringRef FormatStr; 5828 SMLoc Loc = getLoc(); 5829 if (!parseId(FormatStr, "expected a format string")) 5830 return MatchOperand_ParseFail; 5831 5832 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5833 if (Res == MatchOperand_NoMatch) 5834 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5835 if (Res != MatchOperand_Success) 5836 return Res; 5837 5838 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5839 return MatchOperand_ParseFail; 5840 5841 return MatchOperand_Success; 5842 } 5843 5844 return parseNumericFormat(Format); 5845 } 5846 5847 OperandMatchResultTy 5848 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5849 using namespace llvm::AMDGPU::MTBUFFormat; 5850 5851 int64_t Format = getDefaultFormatEncoding(getSTI()); 5852 OperandMatchResultTy Res; 5853 SMLoc Loc = getLoc(); 5854 5855 // Parse legacy format syntax. 5856 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5857 if (Res == MatchOperand_ParseFail) 5858 return Res; 5859 5860 bool FormatFound = (Res == MatchOperand_Success); 5861 5862 Operands.push_back( 5863 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5864 5865 if (FormatFound) 5866 trySkipToken(AsmToken::Comma); 5867 5868 if (isToken(AsmToken::EndOfStatement)) { 5869 // We are expecting an soffset operand, 5870 // but let matcher handle the error. 5871 return MatchOperand_Success; 5872 } 5873 5874 // Parse soffset. 5875 Res = parseRegOrImm(Operands); 5876 if (Res != MatchOperand_Success) 5877 return Res; 5878 5879 trySkipToken(AsmToken::Comma); 5880 5881 if (!FormatFound) { 5882 Res = parseSymbolicOrNumericFormat(Format); 5883 if (Res == MatchOperand_ParseFail) 5884 return Res; 5885 if (Res == MatchOperand_Success) { 5886 auto Size = Operands.size(); 5887 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5888 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5889 Op.setImm(Format); 5890 } 5891 return MatchOperand_Success; 5892 } 5893 5894 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5895 Error(getLoc(), "duplicate format"); 5896 return MatchOperand_ParseFail; 5897 } 5898 return MatchOperand_Success; 5899 } 5900 5901 //===----------------------------------------------------------------------===// 5902 // ds 5903 //===----------------------------------------------------------------------===// 5904 5905 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5906 const OperandVector &Operands) { 5907 OptionalImmIndexMap OptionalIdx; 5908 5909 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5910 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5911 5912 // Add the register arguments 5913 if (Op.isReg()) { 5914 Op.addRegOperands(Inst, 1); 5915 continue; 5916 } 5917 5918 // Handle optional arguments 5919 OptionalIdx[Op.getImmTy()] = i; 5920 } 5921 5922 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5923 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5924 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5925 5926 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5927 } 5928 5929 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5930 bool IsGdsHardcoded) { 5931 OptionalImmIndexMap OptionalIdx; 5932 5933 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5934 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5935 5936 // Add the register arguments 5937 if (Op.isReg()) { 5938 Op.addRegOperands(Inst, 1); 5939 continue; 5940 } 5941 5942 if (Op.isToken() && Op.getToken() == "gds") { 5943 IsGdsHardcoded = true; 5944 continue; 5945 } 5946 5947 // Handle optional arguments 5948 OptionalIdx[Op.getImmTy()] = i; 5949 } 5950 5951 AMDGPUOperand::ImmTy OffsetType = 5952 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5953 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5954 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5955 AMDGPUOperand::ImmTyOffset; 5956 5957 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5958 5959 if (!IsGdsHardcoded) { 5960 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5961 } 5962 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5963 } 5964 5965 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5966 OptionalImmIndexMap OptionalIdx; 5967 5968 unsigned OperandIdx[4]; 5969 unsigned EnMask = 0; 5970 int SrcIdx = 0; 5971 5972 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5973 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5974 5975 // Add the register arguments 5976 if (Op.isReg()) { 5977 assert(SrcIdx < 4); 5978 OperandIdx[SrcIdx] = Inst.size(); 5979 Op.addRegOperands(Inst, 1); 5980 ++SrcIdx; 5981 continue; 5982 } 5983 5984 if (Op.isOff()) { 5985 assert(SrcIdx < 4); 5986 OperandIdx[SrcIdx] = Inst.size(); 5987 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5988 ++SrcIdx; 5989 continue; 5990 } 5991 5992 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5993 Op.addImmOperands(Inst, 1); 5994 continue; 5995 } 5996 5997 if (Op.isToken() && Op.getToken() == "done") 5998 continue; 5999 6000 // Handle optional arguments 6001 OptionalIdx[Op.getImmTy()] = i; 6002 } 6003 6004 assert(SrcIdx == 4); 6005 6006 bool Compr = false; 6007 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 6008 Compr = true; 6009 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 6010 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 6011 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 6012 } 6013 6014 for (auto i = 0; i < SrcIdx; ++i) { 6015 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 6016 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 6017 } 6018 } 6019 6020 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 6021 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 6022 6023 Inst.addOperand(MCOperand::createImm(EnMask)); 6024 } 6025 6026 //===----------------------------------------------------------------------===// 6027 // s_waitcnt 6028 //===----------------------------------------------------------------------===// 6029 6030 static bool 6031 encodeCnt( 6032 const AMDGPU::IsaVersion ISA, 6033 int64_t &IntVal, 6034 int64_t CntVal, 6035 bool Saturate, 6036 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 6037 unsigned (*decode)(const IsaVersion &Version, unsigned)) 6038 { 6039 bool Failed = false; 6040 6041 IntVal = encode(ISA, IntVal, CntVal); 6042 if (CntVal != decode(ISA, IntVal)) { 6043 if (Saturate) { 6044 IntVal = encode(ISA, IntVal, -1); 6045 } else { 6046 Failed = true; 6047 } 6048 } 6049 return Failed; 6050 } 6051 6052 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 6053 6054 SMLoc CntLoc = getLoc(); 6055 StringRef CntName = getTokenStr(); 6056 6057 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 6058 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 6059 return false; 6060 6061 int64_t CntVal; 6062 SMLoc ValLoc = getLoc(); 6063 if (!parseExpr(CntVal)) 6064 return false; 6065 6066 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6067 6068 bool Failed = true; 6069 bool Sat = CntName.endswith("_sat"); 6070 6071 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 6072 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 6073 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 6074 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 6075 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 6076 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 6077 } else { 6078 Error(CntLoc, "invalid counter name " + CntName); 6079 return false; 6080 } 6081 6082 if (Failed) { 6083 Error(ValLoc, "too large value for " + CntName); 6084 return false; 6085 } 6086 6087 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 6088 return false; 6089 6090 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 6091 if (isToken(AsmToken::EndOfStatement)) { 6092 Error(getLoc(), "expected a counter name"); 6093 return false; 6094 } 6095 } 6096 6097 return true; 6098 } 6099 6100 OperandMatchResultTy 6101 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 6102 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 6103 int64_t Waitcnt = getWaitcntBitMask(ISA); 6104 SMLoc S = getLoc(); 6105 6106 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 6107 while (!isToken(AsmToken::EndOfStatement)) { 6108 if (!parseCnt(Waitcnt)) 6109 return MatchOperand_ParseFail; 6110 } 6111 } else { 6112 if (!parseExpr(Waitcnt)) 6113 return MatchOperand_ParseFail; 6114 } 6115 6116 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 6117 return MatchOperand_Success; 6118 } 6119 6120 bool 6121 AMDGPUOperand::isSWaitCnt() const { 6122 return isImm(); 6123 } 6124 6125 //===----------------------------------------------------------------------===// 6126 // hwreg 6127 //===----------------------------------------------------------------------===// 6128 6129 bool 6130 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6131 OperandInfoTy &Offset, 6132 OperandInfoTy &Width) { 6133 using namespace llvm::AMDGPU::Hwreg; 6134 6135 // The register may be specified by name or using a numeric code 6136 HwReg.Loc = getLoc(); 6137 if (isToken(AsmToken::Identifier) && 6138 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6139 HwReg.IsSymbolic = true; 6140 lex(); // skip register name 6141 } else if (!parseExpr(HwReg.Id, "a register name")) { 6142 return false; 6143 } 6144 6145 if (trySkipToken(AsmToken::RParen)) 6146 return true; 6147 6148 // parse optional params 6149 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6150 return false; 6151 6152 Offset.Loc = getLoc(); 6153 if (!parseExpr(Offset.Id)) 6154 return false; 6155 6156 if (!skipToken(AsmToken::Comma, "expected a comma")) 6157 return false; 6158 6159 Width.Loc = getLoc(); 6160 return parseExpr(Width.Id) && 6161 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6162 } 6163 6164 bool 6165 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6166 const OperandInfoTy &Offset, 6167 const OperandInfoTy &Width) { 6168 6169 using namespace llvm::AMDGPU::Hwreg; 6170 6171 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6172 Error(HwReg.Loc, 6173 "specified hardware register is not supported on this GPU"); 6174 return false; 6175 } 6176 if (!isValidHwreg(HwReg.Id)) { 6177 Error(HwReg.Loc, 6178 "invalid code of hardware register: only 6-bit values are legal"); 6179 return false; 6180 } 6181 if (!isValidHwregOffset(Offset.Id)) { 6182 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6183 return false; 6184 } 6185 if (!isValidHwregWidth(Width.Id)) { 6186 Error(Width.Loc, 6187 "invalid bitfield width: only values from 1 to 32 are legal"); 6188 return false; 6189 } 6190 return true; 6191 } 6192 6193 OperandMatchResultTy 6194 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6195 using namespace llvm::AMDGPU::Hwreg; 6196 6197 int64_t ImmVal = 0; 6198 SMLoc Loc = getLoc(); 6199 6200 if (trySkipId("hwreg", AsmToken::LParen)) { 6201 OperandInfoTy HwReg(ID_UNKNOWN_); 6202 OperandInfoTy Offset(OFFSET_DEFAULT_); 6203 OperandInfoTy Width(WIDTH_DEFAULT_); 6204 if (parseHwregBody(HwReg, Offset, Width) && 6205 validateHwreg(HwReg, Offset, Width)) { 6206 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6207 } else { 6208 return MatchOperand_ParseFail; 6209 } 6210 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6211 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6212 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6213 return MatchOperand_ParseFail; 6214 } 6215 } else { 6216 return MatchOperand_ParseFail; 6217 } 6218 6219 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6220 return MatchOperand_Success; 6221 } 6222 6223 bool AMDGPUOperand::isHwreg() const { 6224 return isImmTy(ImmTyHwreg); 6225 } 6226 6227 //===----------------------------------------------------------------------===// 6228 // sendmsg 6229 //===----------------------------------------------------------------------===// 6230 6231 bool 6232 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6233 OperandInfoTy &Op, 6234 OperandInfoTy &Stream) { 6235 using namespace llvm::AMDGPU::SendMsg; 6236 6237 Msg.Loc = getLoc(); 6238 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6239 Msg.IsSymbolic = true; 6240 lex(); // skip message name 6241 } else if (!parseExpr(Msg.Id, "a message name")) { 6242 return false; 6243 } 6244 6245 if (trySkipToken(AsmToken::Comma)) { 6246 Op.IsDefined = true; 6247 Op.Loc = getLoc(); 6248 if (isToken(AsmToken::Identifier) && 6249 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6250 lex(); // skip operation name 6251 } else if (!parseExpr(Op.Id, "an operation name")) { 6252 return false; 6253 } 6254 6255 if (trySkipToken(AsmToken::Comma)) { 6256 Stream.IsDefined = true; 6257 Stream.Loc = getLoc(); 6258 if (!parseExpr(Stream.Id)) 6259 return false; 6260 } 6261 } 6262 6263 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6264 } 6265 6266 bool 6267 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6268 const OperandInfoTy &Op, 6269 const OperandInfoTy &Stream) { 6270 using namespace llvm::AMDGPU::SendMsg; 6271 6272 // Validation strictness depends on whether message is specified 6273 // in a symbolc or in a numeric form. In the latter case 6274 // only encoding possibility is checked. 6275 bool Strict = Msg.IsSymbolic; 6276 6277 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6278 Error(Msg.Loc, "invalid message id"); 6279 return false; 6280 } 6281 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6282 if (Op.IsDefined) { 6283 Error(Op.Loc, "message does not support operations"); 6284 } else { 6285 Error(Msg.Loc, "missing message operation"); 6286 } 6287 return false; 6288 } 6289 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6290 Error(Op.Loc, "invalid operation id"); 6291 return false; 6292 } 6293 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6294 Error(Stream.Loc, "message operation does not support streams"); 6295 return false; 6296 } 6297 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6298 Error(Stream.Loc, "invalid message stream id"); 6299 return false; 6300 } 6301 return true; 6302 } 6303 6304 OperandMatchResultTy 6305 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6306 using namespace llvm::AMDGPU::SendMsg; 6307 6308 int64_t ImmVal = 0; 6309 SMLoc Loc = getLoc(); 6310 6311 if (trySkipId("sendmsg", AsmToken::LParen)) { 6312 OperandInfoTy Msg(ID_UNKNOWN_); 6313 OperandInfoTy Op(OP_NONE_); 6314 OperandInfoTy Stream(STREAM_ID_NONE_); 6315 if (parseSendMsgBody(Msg, Op, Stream) && 6316 validateSendMsg(Msg, Op, Stream)) { 6317 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6318 } else { 6319 return MatchOperand_ParseFail; 6320 } 6321 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6322 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6323 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6324 return MatchOperand_ParseFail; 6325 } 6326 } else { 6327 return MatchOperand_ParseFail; 6328 } 6329 6330 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6331 return MatchOperand_Success; 6332 } 6333 6334 bool AMDGPUOperand::isSendMsg() const { 6335 return isImmTy(ImmTySendMsg); 6336 } 6337 6338 //===----------------------------------------------------------------------===// 6339 // v_interp 6340 //===----------------------------------------------------------------------===// 6341 6342 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6343 StringRef Str; 6344 SMLoc S = getLoc(); 6345 6346 if (!parseId(Str)) 6347 return MatchOperand_NoMatch; 6348 6349 int Slot = StringSwitch<int>(Str) 6350 .Case("p10", 0) 6351 .Case("p20", 1) 6352 .Case("p0", 2) 6353 .Default(-1); 6354 6355 if (Slot == -1) { 6356 Error(S, "invalid interpolation slot"); 6357 return MatchOperand_ParseFail; 6358 } 6359 6360 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6361 AMDGPUOperand::ImmTyInterpSlot)); 6362 return MatchOperand_Success; 6363 } 6364 6365 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6366 StringRef Str; 6367 SMLoc S = getLoc(); 6368 6369 if (!parseId(Str)) 6370 return MatchOperand_NoMatch; 6371 6372 if (!Str.startswith("attr")) { 6373 Error(S, "invalid interpolation attribute"); 6374 return MatchOperand_ParseFail; 6375 } 6376 6377 StringRef Chan = Str.take_back(2); 6378 int AttrChan = StringSwitch<int>(Chan) 6379 .Case(".x", 0) 6380 .Case(".y", 1) 6381 .Case(".z", 2) 6382 .Case(".w", 3) 6383 .Default(-1); 6384 if (AttrChan == -1) { 6385 Error(S, "invalid or missing interpolation attribute channel"); 6386 return MatchOperand_ParseFail; 6387 } 6388 6389 Str = Str.drop_back(2).drop_front(4); 6390 6391 uint8_t Attr; 6392 if (Str.getAsInteger(10, Attr)) { 6393 Error(S, "invalid or missing interpolation attribute number"); 6394 return MatchOperand_ParseFail; 6395 } 6396 6397 if (Attr > 63) { 6398 Error(S, "out of bounds interpolation attribute number"); 6399 return MatchOperand_ParseFail; 6400 } 6401 6402 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6403 6404 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6405 AMDGPUOperand::ImmTyInterpAttr)); 6406 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6407 AMDGPUOperand::ImmTyAttrChan)); 6408 return MatchOperand_Success; 6409 } 6410 6411 //===----------------------------------------------------------------------===// 6412 // exp 6413 //===----------------------------------------------------------------------===// 6414 6415 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6416 using namespace llvm::AMDGPU::Exp; 6417 6418 StringRef Str; 6419 SMLoc S = getLoc(); 6420 6421 if (!parseId(Str)) 6422 return MatchOperand_NoMatch; 6423 6424 unsigned Id = getTgtId(Str); 6425 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6426 Error(S, (Id == ET_INVALID) ? 6427 "invalid exp target" : 6428 "exp target is not supported on this GPU"); 6429 return MatchOperand_ParseFail; 6430 } 6431 6432 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6433 AMDGPUOperand::ImmTyExpTgt)); 6434 return MatchOperand_Success; 6435 } 6436 6437 //===----------------------------------------------------------------------===// 6438 // parser helpers 6439 //===----------------------------------------------------------------------===// 6440 6441 bool 6442 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6443 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6444 } 6445 6446 bool 6447 AMDGPUAsmParser::isId(const StringRef Id) const { 6448 return isId(getToken(), Id); 6449 } 6450 6451 bool 6452 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6453 return getTokenKind() == Kind; 6454 } 6455 6456 bool 6457 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6458 if (isId(Id)) { 6459 lex(); 6460 return true; 6461 } 6462 return false; 6463 } 6464 6465 bool 6466 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6467 if (isToken(AsmToken::Identifier)) { 6468 StringRef Tok = getTokenStr(); 6469 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6470 lex(); 6471 return true; 6472 } 6473 } 6474 return false; 6475 } 6476 6477 bool 6478 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6479 if (isId(Id) && peekToken().is(Kind)) { 6480 lex(); 6481 lex(); 6482 return true; 6483 } 6484 return false; 6485 } 6486 6487 bool 6488 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6489 if (isToken(Kind)) { 6490 lex(); 6491 return true; 6492 } 6493 return false; 6494 } 6495 6496 bool 6497 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6498 const StringRef ErrMsg) { 6499 if (!trySkipToken(Kind)) { 6500 Error(getLoc(), ErrMsg); 6501 return false; 6502 } 6503 return true; 6504 } 6505 6506 bool 6507 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6508 SMLoc S = getLoc(); 6509 6510 const MCExpr *Expr; 6511 if (Parser.parseExpression(Expr)) 6512 return false; 6513 6514 if (Expr->evaluateAsAbsolute(Imm)) 6515 return true; 6516 6517 if (Expected.empty()) { 6518 Error(S, "expected absolute expression"); 6519 } else { 6520 Error(S, Twine("expected ", Expected) + 6521 Twine(" or an absolute expression")); 6522 } 6523 return false; 6524 } 6525 6526 bool 6527 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6528 SMLoc S = getLoc(); 6529 6530 const MCExpr *Expr; 6531 if (Parser.parseExpression(Expr)) 6532 return false; 6533 6534 int64_t IntVal; 6535 if (Expr->evaluateAsAbsolute(IntVal)) { 6536 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6537 } else { 6538 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6539 } 6540 return true; 6541 } 6542 6543 bool 6544 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6545 if (isToken(AsmToken::String)) { 6546 Val = getToken().getStringContents(); 6547 lex(); 6548 return true; 6549 } else { 6550 Error(getLoc(), ErrMsg); 6551 return false; 6552 } 6553 } 6554 6555 bool 6556 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6557 if (isToken(AsmToken::Identifier)) { 6558 Val = getTokenStr(); 6559 lex(); 6560 return true; 6561 } else { 6562 if (!ErrMsg.empty()) 6563 Error(getLoc(), ErrMsg); 6564 return false; 6565 } 6566 } 6567 6568 AsmToken 6569 AMDGPUAsmParser::getToken() const { 6570 return Parser.getTok(); 6571 } 6572 6573 AsmToken 6574 AMDGPUAsmParser::peekToken() { 6575 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6576 } 6577 6578 void 6579 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6580 auto TokCount = getLexer().peekTokens(Tokens); 6581 6582 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6583 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6584 } 6585 6586 AsmToken::TokenKind 6587 AMDGPUAsmParser::getTokenKind() const { 6588 return getLexer().getKind(); 6589 } 6590 6591 SMLoc 6592 AMDGPUAsmParser::getLoc() const { 6593 return getToken().getLoc(); 6594 } 6595 6596 StringRef 6597 AMDGPUAsmParser::getTokenStr() const { 6598 return getToken().getString(); 6599 } 6600 6601 void 6602 AMDGPUAsmParser::lex() { 6603 Parser.Lex(); 6604 } 6605 6606 SMLoc 6607 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6608 const OperandVector &Operands) const { 6609 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6610 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6611 if (Test(Op)) 6612 return Op.getStartLoc(); 6613 } 6614 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6615 } 6616 6617 SMLoc 6618 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6619 const OperandVector &Operands) const { 6620 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6621 return getOperandLoc(Test, Operands); 6622 } 6623 6624 SMLoc 6625 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6626 const OperandVector &Operands) const { 6627 auto Test = [=](const AMDGPUOperand& Op) { 6628 return Op.isRegKind() && Op.getReg() == Reg; 6629 }; 6630 return getOperandLoc(Test, Operands); 6631 } 6632 6633 SMLoc 6634 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6635 auto Test = [](const AMDGPUOperand& Op) { 6636 return Op.IsImmKindLiteral() || Op.isExpr(); 6637 }; 6638 return getOperandLoc(Test, Operands); 6639 } 6640 6641 SMLoc 6642 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6643 auto Test = [](const AMDGPUOperand& Op) { 6644 return Op.isImmKindConst(); 6645 }; 6646 return getOperandLoc(Test, Operands); 6647 } 6648 6649 //===----------------------------------------------------------------------===// 6650 // swizzle 6651 //===----------------------------------------------------------------------===// 6652 6653 LLVM_READNONE 6654 static unsigned 6655 encodeBitmaskPerm(const unsigned AndMask, 6656 const unsigned OrMask, 6657 const unsigned XorMask) { 6658 using namespace llvm::AMDGPU::Swizzle; 6659 6660 return BITMASK_PERM_ENC | 6661 (AndMask << BITMASK_AND_SHIFT) | 6662 (OrMask << BITMASK_OR_SHIFT) | 6663 (XorMask << BITMASK_XOR_SHIFT); 6664 } 6665 6666 bool 6667 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6668 const unsigned MinVal, 6669 const unsigned MaxVal, 6670 const StringRef ErrMsg, 6671 SMLoc &Loc) { 6672 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6673 return false; 6674 } 6675 Loc = getLoc(); 6676 if (!parseExpr(Op)) { 6677 return false; 6678 } 6679 if (Op < MinVal || Op > MaxVal) { 6680 Error(Loc, ErrMsg); 6681 return false; 6682 } 6683 6684 return true; 6685 } 6686 6687 bool 6688 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6689 const unsigned MinVal, 6690 const unsigned MaxVal, 6691 const StringRef ErrMsg) { 6692 SMLoc Loc; 6693 for (unsigned i = 0; i < OpNum; ++i) { 6694 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6695 return false; 6696 } 6697 6698 return true; 6699 } 6700 6701 bool 6702 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6703 using namespace llvm::AMDGPU::Swizzle; 6704 6705 int64_t Lane[LANE_NUM]; 6706 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6707 "expected a 2-bit lane id")) { 6708 Imm = QUAD_PERM_ENC; 6709 for (unsigned I = 0; I < LANE_NUM; ++I) { 6710 Imm |= Lane[I] << (LANE_SHIFT * I); 6711 } 6712 return true; 6713 } 6714 return false; 6715 } 6716 6717 bool 6718 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6719 using namespace llvm::AMDGPU::Swizzle; 6720 6721 SMLoc Loc; 6722 int64_t GroupSize; 6723 int64_t LaneIdx; 6724 6725 if (!parseSwizzleOperand(GroupSize, 6726 2, 32, 6727 "group size must be in the interval [2,32]", 6728 Loc)) { 6729 return false; 6730 } 6731 if (!isPowerOf2_64(GroupSize)) { 6732 Error(Loc, "group size must be a power of two"); 6733 return false; 6734 } 6735 if (parseSwizzleOperand(LaneIdx, 6736 0, GroupSize - 1, 6737 "lane id must be in the interval [0,group size - 1]", 6738 Loc)) { 6739 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6740 return true; 6741 } 6742 return false; 6743 } 6744 6745 bool 6746 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6747 using namespace llvm::AMDGPU::Swizzle; 6748 6749 SMLoc Loc; 6750 int64_t GroupSize; 6751 6752 if (!parseSwizzleOperand(GroupSize, 6753 2, 32, 6754 "group size must be in the interval [2,32]", 6755 Loc)) { 6756 return false; 6757 } 6758 if (!isPowerOf2_64(GroupSize)) { 6759 Error(Loc, "group size must be a power of two"); 6760 return false; 6761 } 6762 6763 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6764 return true; 6765 } 6766 6767 bool 6768 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6769 using namespace llvm::AMDGPU::Swizzle; 6770 6771 SMLoc Loc; 6772 int64_t GroupSize; 6773 6774 if (!parseSwizzleOperand(GroupSize, 6775 1, 16, 6776 "group size must be in the interval [1,16]", 6777 Loc)) { 6778 return false; 6779 } 6780 if (!isPowerOf2_64(GroupSize)) { 6781 Error(Loc, "group size must be a power of two"); 6782 return false; 6783 } 6784 6785 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6786 return true; 6787 } 6788 6789 bool 6790 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6791 using namespace llvm::AMDGPU::Swizzle; 6792 6793 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6794 return false; 6795 } 6796 6797 StringRef Ctl; 6798 SMLoc StrLoc = getLoc(); 6799 if (!parseString(Ctl)) { 6800 return false; 6801 } 6802 if (Ctl.size() != BITMASK_WIDTH) { 6803 Error(StrLoc, "expected a 5-character mask"); 6804 return false; 6805 } 6806 6807 unsigned AndMask = 0; 6808 unsigned OrMask = 0; 6809 unsigned XorMask = 0; 6810 6811 for (size_t i = 0; i < Ctl.size(); ++i) { 6812 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6813 switch(Ctl[i]) { 6814 default: 6815 Error(StrLoc, "invalid mask"); 6816 return false; 6817 case '0': 6818 break; 6819 case '1': 6820 OrMask |= Mask; 6821 break; 6822 case 'p': 6823 AndMask |= Mask; 6824 break; 6825 case 'i': 6826 AndMask |= Mask; 6827 XorMask |= Mask; 6828 break; 6829 } 6830 } 6831 6832 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6833 return true; 6834 } 6835 6836 bool 6837 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6838 6839 SMLoc OffsetLoc = getLoc(); 6840 6841 if (!parseExpr(Imm, "a swizzle macro")) { 6842 return false; 6843 } 6844 if (!isUInt<16>(Imm)) { 6845 Error(OffsetLoc, "expected a 16-bit offset"); 6846 return false; 6847 } 6848 return true; 6849 } 6850 6851 bool 6852 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6853 using namespace llvm::AMDGPU::Swizzle; 6854 6855 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6856 6857 SMLoc ModeLoc = getLoc(); 6858 bool Ok = false; 6859 6860 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6861 Ok = parseSwizzleQuadPerm(Imm); 6862 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6863 Ok = parseSwizzleBitmaskPerm(Imm); 6864 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6865 Ok = parseSwizzleBroadcast(Imm); 6866 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6867 Ok = parseSwizzleSwap(Imm); 6868 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6869 Ok = parseSwizzleReverse(Imm); 6870 } else { 6871 Error(ModeLoc, "expected a swizzle mode"); 6872 } 6873 6874 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6875 } 6876 6877 return false; 6878 } 6879 6880 OperandMatchResultTy 6881 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6882 SMLoc S = getLoc(); 6883 int64_t Imm = 0; 6884 6885 if (trySkipId("offset")) { 6886 6887 bool Ok = false; 6888 if (skipToken(AsmToken::Colon, "expected a colon")) { 6889 if (trySkipId("swizzle")) { 6890 Ok = parseSwizzleMacro(Imm); 6891 } else { 6892 Ok = parseSwizzleOffset(Imm); 6893 } 6894 } 6895 6896 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6897 6898 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6899 } else { 6900 // Swizzle "offset" operand is optional. 6901 // If it is omitted, try parsing other optional operands. 6902 return parseOptionalOpr(Operands); 6903 } 6904 } 6905 6906 bool 6907 AMDGPUOperand::isSwizzle() const { 6908 return isImmTy(ImmTySwizzle); 6909 } 6910 6911 //===----------------------------------------------------------------------===// 6912 // VGPR Index Mode 6913 //===----------------------------------------------------------------------===// 6914 6915 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6916 6917 using namespace llvm::AMDGPU::VGPRIndexMode; 6918 6919 if (trySkipToken(AsmToken::RParen)) { 6920 return OFF; 6921 } 6922 6923 int64_t Imm = 0; 6924 6925 while (true) { 6926 unsigned Mode = 0; 6927 SMLoc S = getLoc(); 6928 6929 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6930 if (trySkipId(IdSymbolic[ModeId])) { 6931 Mode = 1 << ModeId; 6932 break; 6933 } 6934 } 6935 6936 if (Mode == 0) { 6937 Error(S, (Imm == 0)? 6938 "expected a VGPR index mode or a closing parenthesis" : 6939 "expected a VGPR index mode"); 6940 return UNDEF; 6941 } 6942 6943 if (Imm & Mode) { 6944 Error(S, "duplicate VGPR index mode"); 6945 return UNDEF; 6946 } 6947 Imm |= Mode; 6948 6949 if (trySkipToken(AsmToken::RParen)) 6950 break; 6951 if (!skipToken(AsmToken::Comma, 6952 "expected a comma or a closing parenthesis")) 6953 return UNDEF; 6954 } 6955 6956 return Imm; 6957 } 6958 6959 OperandMatchResultTy 6960 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6961 6962 using namespace llvm::AMDGPU::VGPRIndexMode; 6963 6964 int64_t Imm = 0; 6965 SMLoc S = getLoc(); 6966 6967 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6968 Imm = parseGPRIdxMacro(); 6969 if (Imm == UNDEF) 6970 return MatchOperand_ParseFail; 6971 } else { 6972 if (getParser().parseAbsoluteExpression(Imm)) 6973 return MatchOperand_ParseFail; 6974 if (Imm < 0 || !isUInt<4>(Imm)) { 6975 Error(S, "invalid immediate: only 4-bit values are legal"); 6976 return MatchOperand_ParseFail; 6977 } 6978 } 6979 6980 Operands.push_back( 6981 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6982 return MatchOperand_Success; 6983 } 6984 6985 bool AMDGPUOperand::isGPRIdxMode() const { 6986 return isImmTy(ImmTyGprIdxMode); 6987 } 6988 6989 //===----------------------------------------------------------------------===// 6990 // sopp branch targets 6991 //===----------------------------------------------------------------------===// 6992 6993 OperandMatchResultTy 6994 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6995 6996 // Make sure we are not parsing something 6997 // that looks like a label or an expression but is not. 6998 // This will improve error messages. 6999 if (isRegister() || isModifier()) 7000 return MatchOperand_NoMatch; 7001 7002 if (!parseExpr(Operands)) 7003 return MatchOperand_ParseFail; 7004 7005 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 7006 assert(Opr.isImm() || Opr.isExpr()); 7007 SMLoc Loc = Opr.getStartLoc(); 7008 7009 // Currently we do not support arbitrary expressions as branch targets. 7010 // Only labels and absolute expressions are accepted. 7011 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 7012 Error(Loc, "expected an absolute expression or a label"); 7013 } else if (Opr.isImm() && !Opr.isS16Imm()) { 7014 Error(Loc, "expected a 16-bit signed jump offset"); 7015 } 7016 7017 return MatchOperand_Success; 7018 } 7019 7020 //===----------------------------------------------------------------------===// 7021 // Boolean holding registers 7022 //===----------------------------------------------------------------------===// 7023 7024 OperandMatchResultTy 7025 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 7026 return parseReg(Operands); 7027 } 7028 7029 //===----------------------------------------------------------------------===// 7030 // mubuf 7031 //===----------------------------------------------------------------------===// 7032 7033 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 7034 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 7035 } 7036 7037 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 7038 const OperandVector &Operands, 7039 bool IsAtomic, 7040 bool IsLds) { 7041 bool IsLdsOpcode = IsLds; 7042 bool HasLdsModifier = false; 7043 OptionalImmIndexMap OptionalIdx; 7044 unsigned FirstOperandIdx = 1; 7045 bool IsAtomicReturn = false; 7046 7047 if (IsAtomic) { 7048 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7049 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7050 if (!Op.isCPol()) 7051 continue; 7052 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7053 break; 7054 } 7055 7056 if (!IsAtomicReturn) { 7057 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7058 if (NewOpc != -1) 7059 Inst.setOpcode(NewOpc); 7060 } 7061 7062 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7063 SIInstrFlags::IsAtomicRet; 7064 } 7065 7066 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 7067 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7068 7069 // Add the register arguments 7070 if (Op.isReg()) { 7071 Op.addRegOperands(Inst, 1); 7072 // Insert a tied src for atomic return dst. 7073 // This cannot be postponed as subsequent calls to 7074 // addImmOperands rely on correct number of MC operands. 7075 if (IsAtomicReturn && i == FirstOperandIdx) 7076 Op.addRegOperands(Inst, 1); 7077 continue; 7078 } 7079 7080 // Handle the case where soffset is an immediate 7081 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7082 Op.addImmOperands(Inst, 1); 7083 continue; 7084 } 7085 7086 HasLdsModifier |= Op.isLDS(); 7087 7088 // Handle tokens like 'offen' which are sometimes hard-coded into the 7089 // asm string. There are no MCInst operands for these. 7090 if (Op.isToken()) { 7091 continue; 7092 } 7093 assert(Op.isImm()); 7094 7095 // Handle optional arguments 7096 OptionalIdx[Op.getImmTy()] = i; 7097 } 7098 7099 // This is a workaround for an llvm quirk which may result in an 7100 // incorrect instruction selection. Lds and non-lds versions of 7101 // MUBUF instructions are identical except that lds versions 7102 // have mandatory 'lds' modifier. However this modifier follows 7103 // optional modifiers and llvm asm matcher regards this 'lds' 7104 // modifier as an optional one. As a result, an lds version 7105 // of opcode may be selected even if it has no 'lds' modifier. 7106 if (IsLdsOpcode && !HasLdsModifier) { 7107 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 7108 if (NoLdsOpcode != -1) { // Got lds version - correct it. 7109 Inst.setOpcode(NoLdsOpcode); 7110 IsLdsOpcode = false; 7111 } 7112 } 7113 7114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7116 7117 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 7118 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7119 } 7120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7121 } 7122 7123 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7124 OptionalImmIndexMap OptionalIdx; 7125 7126 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7127 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7128 7129 // Add the register arguments 7130 if (Op.isReg()) { 7131 Op.addRegOperands(Inst, 1); 7132 continue; 7133 } 7134 7135 // Handle the case where soffset is an immediate 7136 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7137 Op.addImmOperands(Inst, 1); 7138 continue; 7139 } 7140 7141 // Handle tokens like 'offen' which are sometimes hard-coded into the 7142 // asm string. There are no MCInst operands for these. 7143 if (Op.isToken()) { 7144 continue; 7145 } 7146 assert(Op.isImm()); 7147 7148 // Handle optional arguments 7149 OptionalIdx[Op.getImmTy()] = i; 7150 } 7151 7152 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7153 AMDGPUOperand::ImmTyOffset); 7154 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7158 } 7159 7160 //===----------------------------------------------------------------------===// 7161 // mimg 7162 //===----------------------------------------------------------------------===// 7163 7164 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7165 bool IsAtomic) { 7166 unsigned I = 1; 7167 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7168 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7169 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7170 } 7171 7172 if (IsAtomic) { 7173 // Add src, same as dst 7174 assert(Desc.getNumDefs() == 1); 7175 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7176 } 7177 7178 OptionalImmIndexMap OptionalIdx; 7179 7180 for (unsigned E = Operands.size(); I != E; ++I) { 7181 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7182 7183 // Add the register arguments 7184 if (Op.isReg()) { 7185 Op.addRegOperands(Inst, 1); 7186 } else if (Op.isImmModifier()) { 7187 OptionalIdx[Op.getImmTy()] = I; 7188 } else if (!Op.isToken()) { 7189 llvm_unreachable("unexpected operand type"); 7190 } 7191 } 7192 7193 bool IsGFX10Plus = isGFX10Plus(); 7194 7195 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7196 if (IsGFX10Plus) 7197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7199 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7200 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7201 if (IsGFX10Plus) 7202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7203 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7204 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7205 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7206 if (!IsGFX10Plus) 7207 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7208 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7209 } 7210 7211 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7212 cvtMIMG(Inst, Operands, true); 7213 } 7214 7215 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7216 OptionalImmIndexMap OptionalIdx; 7217 bool IsAtomicReturn = false; 7218 7219 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7220 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7221 if (!Op.isCPol()) 7222 continue; 7223 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7224 break; 7225 } 7226 7227 if (!IsAtomicReturn) { 7228 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7229 if (NewOpc != -1) 7230 Inst.setOpcode(NewOpc); 7231 } 7232 7233 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7234 SIInstrFlags::IsAtomicRet; 7235 7236 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7237 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7238 7239 // Add the register arguments 7240 if (Op.isReg()) { 7241 Op.addRegOperands(Inst, 1); 7242 if (IsAtomicReturn && i == 1) 7243 Op.addRegOperands(Inst, 1); 7244 continue; 7245 } 7246 7247 // Handle the case where soffset is an immediate 7248 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7249 Op.addImmOperands(Inst, 1); 7250 continue; 7251 } 7252 7253 // Handle tokens like 'offen' which are sometimes hard-coded into the 7254 // asm string. There are no MCInst operands for these. 7255 if (Op.isToken()) { 7256 continue; 7257 } 7258 assert(Op.isImm()); 7259 7260 // Handle optional arguments 7261 OptionalIdx[Op.getImmTy()] = i; 7262 } 7263 7264 if ((int)Inst.getNumOperands() <= 7265 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7266 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7268 } 7269 7270 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7271 const OperandVector &Operands) { 7272 for (unsigned I = 1; I < Operands.size(); ++I) { 7273 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7274 if (Operand.isReg()) 7275 Operand.addRegOperands(Inst, 1); 7276 } 7277 7278 Inst.addOperand(MCOperand::createImm(1)); // a16 7279 } 7280 7281 //===----------------------------------------------------------------------===// 7282 // smrd 7283 //===----------------------------------------------------------------------===// 7284 7285 bool AMDGPUOperand::isSMRDOffset8() const { 7286 return isImm() && isUInt<8>(getImm()); 7287 } 7288 7289 bool AMDGPUOperand::isSMEMOffset() const { 7290 return isImm(); // Offset range is checked later by validator. 7291 } 7292 7293 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7294 // 32-bit literals are only supported on CI and we only want to use them 7295 // when the offset is > 8-bits. 7296 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7297 } 7298 7299 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7300 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7301 } 7302 7303 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7304 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7305 } 7306 7307 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7308 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7309 } 7310 7311 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7312 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7313 } 7314 7315 //===----------------------------------------------------------------------===// 7316 // vop3 7317 //===----------------------------------------------------------------------===// 7318 7319 static bool ConvertOmodMul(int64_t &Mul) { 7320 if (Mul != 1 && Mul != 2 && Mul != 4) 7321 return false; 7322 7323 Mul >>= 1; 7324 return true; 7325 } 7326 7327 static bool ConvertOmodDiv(int64_t &Div) { 7328 if (Div == 1) { 7329 Div = 0; 7330 return true; 7331 } 7332 7333 if (Div == 2) { 7334 Div = 3; 7335 return true; 7336 } 7337 7338 return false; 7339 } 7340 7341 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7342 // This is intentional and ensures compatibility with sp3. 7343 // See bug 35397 for details. 7344 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7345 if (BoundCtrl == 0 || BoundCtrl == 1) { 7346 BoundCtrl = 1; 7347 return true; 7348 } 7349 return false; 7350 } 7351 7352 // Note: the order in this table matches the order of operands in AsmString. 7353 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7354 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7355 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7356 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7357 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7358 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7359 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7360 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7361 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7362 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7363 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7364 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7365 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7366 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7367 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7368 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7369 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7370 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7371 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7372 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7373 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7374 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7375 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7376 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7377 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7378 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7379 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7380 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7381 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7382 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7383 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7384 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7385 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7386 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7387 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7388 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7389 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7390 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7391 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7392 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7393 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7394 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7395 }; 7396 7397 void AMDGPUAsmParser::onBeginOfFile() { 7398 if (!getParser().getStreamer().getTargetStreamer() || 7399 getSTI().getTargetTriple().getArch() == Triple::r600) 7400 return; 7401 7402 if (!getTargetStreamer().getTargetID()) 7403 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7404 7405 if (isHsaAbiVersion3Or4(&getSTI())) 7406 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7407 } 7408 7409 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7410 7411 OperandMatchResultTy res = parseOptionalOpr(Operands); 7412 7413 // This is a hack to enable hardcoded mandatory operands which follow 7414 // optional operands. 7415 // 7416 // Current design assumes that all operands after the first optional operand 7417 // are also optional. However implementation of some instructions violates 7418 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7419 // 7420 // To alleviate this problem, we have to (implicitly) parse extra operands 7421 // to make sure autogenerated parser of custom operands never hit hardcoded 7422 // mandatory operands. 7423 7424 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7425 if (res != MatchOperand_Success || 7426 isToken(AsmToken::EndOfStatement)) 7427 break; 7428 7429 trySkipToken(AsmToken::Comma); 7430 res = parseOptionalOpr(Operands); 7431 } 7432 7433 return res; 7434 } 7435 7436 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7437 OperandMatchResultTy res; 7438 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7439 // try to parse any optional operand here 7440 if (Op.IsBit) { 7441 res = parseNamedBit(Op.Name, Operands, Op.Type); 7442 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7443 res = parseOModOperand(Operands); 7444 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7445 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7446 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7447 res = parseSDWASel(Operands, Op.Name, Op.Type); 7448 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7449 res = parseSDWADstUnused(Operands); 7450 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7451 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7452 Op.Type == AMDGPUOperand::ImmTyNegLo || 7453 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7454 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7455 Op.ConvertResult); 7456 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7457 res = parseDim(Operands); 7458 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7459 res = parseCPol(Operands); 7460 } else { 7461 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7462 } 7463 if (res != MatchOperand_NoMatch) { 7464 return res; 7465 } 7466 } 7467 return MatchOperand_NoMatch; 7468 } 7469 7470 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7471 StringRef Name = getTokenStr(); 7472 if (Name == "mul") { 7473 return parseIntWithPrefix("mul", Operands, 7474 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7475 } 7476 7477 if (Name == "div") { 7478 return parseIntWithPrefix("div", Operands, 7479 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7480 } 7481 7482 return MatchOperand_NoMatch; 7483 } 7484 7485 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7486 cvtVOP3P(Inst, Operands); 7487 7488 int Opc = Inst.getOpcode(); 7489 7490 int SrcNum; 7491 const int Ops[] = { AMDGPU::OpName::src0, 7492 AMDGPU::OpName::src1, 7493 AMDGPU::OpName::src2 }; 7494 for (SrcNum = 0; 7495 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7496 ++SrcNum); 7497 assert(SrcNum > 0); 7498 7499 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7500 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7501 7502 if ((OpSel & (1 << SrcNum)) != 0) { 7503 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7504 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7505 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7506 } 7507 } 7508 7509 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7510 // 1. This operand is input modifiers 7511 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7512 // 2. This is not last operand 7513 && Desc.NumOperands > (OpNum + 1) 7514 // 3. Next operand is register class 7515 && Desc.OpInfo[OpNum + 1].RegClass != -1 7516 // 4. Next register is not tied to any other operand 7517 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7518 } 7519 7520 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7521 { 7522 OptionalImmIndexMap OptionalIdx; 7523 unsigned Opc = Inst.getOpcode(); 7524 7525 unsigned I = 1; 7526 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7527 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7528 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7529 } 7530 7531 for (unsigned E = Operands.size(); I != E; ++I) { 7532 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7533 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7534 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7535 } else if (Op.isInterpSlot() || 7536 Op.isInterpAttr() || 7537 Op.isAttrChan()) { 7538 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7539 } else if (Op.isImmModifier()) { 7540 OptionalIdx[Op.getImmTy()] = I; 7541 } else { 7542 llvm_unreachable("unhandled operand type"); 7543 } 7544 } 7545 7546 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7547 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7548 } 7549 7550 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7551 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7552 } 7553 7554 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7555 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7556 } 7557 } 7558 7559 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7560 OptionalImmIndexMap &OptionalIdx) { 7561 unsigned Opc = Inst.getOpcode(); 7562 7563 unsigned I = 1; 7564 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7565 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7566 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7567 } 7568 7569 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7570 // This instruction has src modifiers 7571 for (unsigned E = Operands.size(); I != E; ++I) { 7572 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7573 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7574 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7575 } else if (Op.isImmModifier()) { 7576 OptionalIdx[Op.getImmTy()] = I; 7577 } else if (Op.isRegOrImm()) { 7578 Op.addRegOrImmOperands(Inst, 1); 7579 } else { 7580 llvm_unreachable("unhandled operand type"); 7581 } 7582 } 7583 } else { 7584 // No src modifiers 7585 for (unsigned E = Operands.size(); I != E; ++I) { 7586 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7587 if (Op.isMod()) { 7588 OptionalIdx[Op.getImmTy()] = I; 7589 } else { 7590 Op.addRegOrImmOperands(Inst, 1); 7591 } 7592 } 7593 } 7594 7595 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7596 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7597 } 7598 7599 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7600 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7601 } 7602 7603 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7604 // it has src2 register operand that is tied to dst operand 7605 // we don't allow modifiers for this operand in assembler so src2_modifiers 7606 // should be 0. 7607 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7608 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7609 Opc == AMDGPU::V_MAC_F32_e64_vi || 7610 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7611 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7612 Opc == AMDGPU::V_MAC_F16_e64_vi || 7613 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7614 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7615 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7616 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7617 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7618 auto it = Inst.begin(); 7619 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7620 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7621 ++it; 7622 // Copy the operand to ensure it's not invalidated when Inst grows. 7623 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7624 } 7625 } 7626 7627 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7628 OptionalImmIndexMap OptionalIdx; 7629 cvtVOP3(Inst, Operands, OptionalIdx); 7630 } 7631 7632 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands, 7633 OptionalImmIndexMap &OptIdx) { 7634 const int Opc = Inst.getOpcode(); 7635 const MCInstrDesc &Desc = MII.get(Opc); 7636 7637 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7638 7639 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7640 assert(!IsPacked); 7641 Inst.addOperand(Inst.getOperand(0)); 7642 } 7643 7644 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7645 // instruction, and then figure out where to actually put the modifiers 7646 7647 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7648 if (OpSelIdx != -1) { 7649 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7650 } 7651 7652 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7653 if (OpSelHiIdx != -1) { 7654 int DefaultVal = IsPacked ? -1 : 0; 7655 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7656 DefaultVal); 7657 } 7658 7659 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7660 if (NegLoIdx != -1) { 7661 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7662 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7663 } 7664 7665 const int Ops[] = { AMDGPU::OpName::src0, 7666 AMDGPU::OpName::src1, 7667 AMDGPU::OpName::src2 }; 7668 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7669 AMDGPU::OpName::src1_modifiers, 7670 AMDGPU::OpName::src2_modifiers }; 7671 7672 unsigned OpSel = 0; 7673 unsigned OpSelHi = 0; 7674 unsigned NegLo = 0; 7675 unsigned NegHi = 0; 7676 7677 if (OpSelIdx != -1) 7678 OpSel = Inst.getOperand(OpSelIdx).getImm(); 7679 7680 if (OpSelHiIdx != -1) 7681 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7682 7683 if (NegLoIdx != -1) { 7684 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7685 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7686 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7687 } 7688 7689 for (int J = 0; J < 3; ++J) { 7690 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7691 if (OpIdx == -1) 7692 break; 7693 7694 uint32_t ModVal = 0; 7695 7696 if ((OpSel & (1 << J)) != 0) 7697 ModVal |= SISrcMods::OP_SEL_0; 7698 7699 if ((OpSelHi & (1 << J)) != 0) 7700 ModVal |= SISrcMods::OP_SEL_1; 7701 7702 if ((NegLo & (1 << J)) != 0) 7703 ModVal |= SISrcMods::NEG; 7704 7705 if ((NegHi & (1 << J)) != 0) 7706 ModVal |= SISrcMods::NEG_HI; 7707 7708 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7709 7710 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7711 } 7712 } 7713 7714 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) { 7715 OptionalImmIndexMap OptIdx; 7716 cvtVOP3(Inst, Operands, OptIdx); 7717 cvtVOP3P(Inst, Operands, OptIdx); 7718 } 7719 7720 //===----------------------------------------------------------------------===// 7721 // dpp 7722 //===----------------------------------------------------------------------===// 7723 7724 bool AMDGPUOperand::isDPP8() const { 7725 return isImmTy(ImmTyDPP8); 7726 } 7727 7728 bool AMDGPUOperand::isDPPCtrl() const { 7729 using namespace AMDGPU::DPP; 7730 7731 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7732 if (result) { 7733 int64_t Imm = getImm(); 7734 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7735 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7736 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7737 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7738 (Imm == DppCtrl::WAVE_SHL1) || 7739 (Imm == DppCtrl::WAVE_ROL1) || 7740 (Imm == DppCtrl::WAVE_SHR1) || 7741 (Imm == DppCtrl::WAVE_ROR1) || 7742 (Imm == DppCtrl::ROW_MIRROR) || 7743 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7744 (Imm == DppCtrl::BCAST15) || 7745 (Imm == DppCtrl::BCAST31) || 7746 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7747 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7748 } 7749 return false; 7750 } 7751 7752 //===----------------------------------------------------------------------===// 7753 // mAI 7754 //===----------------------------------------------------------------------===// 7755 7756 bool AMDGPUOperand::isBLGP() const { 7757 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7758 } 7759 7760 bool AMDGPUOperand::isCBSZ() const { 7761 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7762 } 7763 7764 bool AMDGPUOperand::isABID() const { 7765 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7766 } 7767 7768 bool AMDGPUOperand::isS16Imm() const { 7769 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7770 } 7771 7772 bool AMDGPUOperand::isU16Imm() const { 7773 return isImm() && isUInt<16>(getImm()); 7774 } 7775 7776 //===----------------------------------------------------------------------===// 7777 // dim 7778 //===----------------------------------------------------------------------===// 7779 7780 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7781 // We want to allow "dim:1D" etc., 7782 // but the initial 1 is tokenized as an integer. 7783 std::string Token; 7784 if (isToken(AsmToken::Integer)) { 7785 SMLoc Loc = getToken().getEndLoc(); 7786 Token = std::string(getTokenStr()); 7787 lex(); 7788 if (getLoc() != Loc) 7789 return false; 7790 } 7791 7792 StringRef Suffix; 7793 if (!parseId(Suffix)) 7794 return false; 7795 Token += Suffix; 7796 7797 StringRef DimId = Token; 7798 if (DimId.startswith("SQ_RSRC_IMG_")) 7799 DimId = DimId.drop_front(12); 7800 7801 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7802 if (!DimInfo) 7803 return false; 7804 7805 Encoding = DimInfo->Encoding; 7806 return true; 7807 } 7808 7809 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7810 if (!isGFX10Plus()) 7811 return MatchOperand_NoMatch; 7812 7813 SMLoc S = getLoc(); 7814 7815 if (!trySkipId("dim", AsmToken::Colon)) 7816 return MatchOperand_NoMatch; 7817 7818 unsigned Encoding; 7819 SMLoc Loc = getLoc(); 7820 if (!parseDimId(Encoding)) { 7821 Error(Loc, "invalid dim value"); 7822 return MatchOperand_ParseFail; 7823 } 7824 7825 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7826 AMDGPUOperand::ImmTyDim)); 7827 return MatchOperand_Success; 7828 } 7829 7830 //===----------------------------------------------------------------------===// 7831 // dpp 7832 //===----------------------------------------------------------------------===// 7833 7834 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7835 SMLoc S = getLoc(); 7836 7837 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7838 return MatchOperand_NoMatch; 7839 7840 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7841 7842 int64_t Sels[8]; 7843 7844 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7845 return MatchOperand_ParseFail; 7846 7847 for (size_t i = 0; i < 8; ++i) { 7848 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7849 return MatchOperand_ParseFail; 7850 7851 SMLoc Loc = getLoc(); 7852 if (getParser().parseAbsoluteExpression(Sels[i])) 7853 return MatchOperand_ParseFail; 7854 if (0 > Sels[i] || 7 < Sels[i]) { 7855 Error(Loc, "expected a 3-bit value"); 7856 return MatchOperand_ParseFail; 7857 } 7858 } 7859 7860 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7861 return MatchOperand_ParseFail; 7862 7863 unsigned DPP8 = 0; 7864 for (size_t i = 0; i < 8; ++i) 7865 DPP8 |= (Sels[i] << (i * 3)); 7866 7867 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7868 return MatchOperand_Success; 7869 } 7870 7871 bool 7872 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7873 const OperandVector &Operands) { 7874 if (Ctrl == "row_newbcast") 7875 return isGFX90A(); 7876 7877 if (Ctrl == "row_share" || 7878 Ctrl == "row_xmask") 7879 return isGFX10Plus(); 7880 7881 if (Ctrl == "wave_shl" || 7882 Ctrl == "wave_shr" || 7883 Ctrl == "wave_rol" || 7884 Ctrl == "wave_ror" || 7885 Ctrl == "row_bcast") 7886 return isVI() || isGFX9(); 7887 7888 return Ctrl == "row_mirror" || 7889 Ctrl == "row_half_mirror" || 7890 Ctrl == "quad_perm" || 7891 Ctrl == "row_shl" || 7892 Ctrl == "row_shr" || 7893 Ctrl == "row_ror"; 7894 } 7895 7896 int64_t 7897 AMDGPUAsmParser::parseDPPCtrlPerm() { 7898 // quad_perm:[%d,%d,%d,%d] 7899 7900 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7901 return -1; 7902 7903 int64_t Val = 0; 7904 for (int i = 0; i < 4; ++i) { 7905 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7906 return -1; 7907 7908 int64_t Temp; 7909 SMLoc Loc = getLoc(); 7910 if (getParser().parseAbsoluteExpression(Temp)) 7911 return -1; 7912 if (Temp < 0 || Temp > 3) { 7913 Error(Loc, "expected a 2-bit value"); 7914 return -1; 7915 } 7916 7917 Val += (Temp << i * 2); 7918 } 7919 7920 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7921 return -1; 7922 7923 return Val; 7924 } 7925 7926 int64_t 7927 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7928 using namespace AMDGPU::DPP; 7929 7930 // sel:%d 7931 7932 int64_t Val; 7933 SMLoc Loc = getLoc(); 7934 7935 if (getParser().parseAbsoluteExpression(Val)) 7936 return -1; 7937 7938 struct DppCtrlCheck { 7939 int64_t Ctrl; 7940 int Lo; 7941 int Hi; 7942 }; 7943 7944 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7945 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7946 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7947 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7948 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7949 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7950 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7951 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7952 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7953 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7954 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7955 .Default({-1, 0, 0}); 7956 7957 bool Valid; 7958 if (Check.Ctrl == -1) { 7959 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7960 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7961 } else { 7962 Valid = Check.Lo <= Val && Val <= Check.Hi; 7963 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7964 } 7965 7966 if (!Valid) { 7967 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7968 return -1; 7969 } 7970 7971 return Val; 7972 } 7973 7974 OperandMatchResultTy 7975 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7976 using namespace AMDGPU::DPP; 7977 7978 if (!isToken(AsmToken::Identifier) || 7979 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7980 return MatchOperand_NoMatch; 7981 7982 SMLoc S = getLoc(); 7983 int64_t Val = -1; 7984 StringRef Ctrl; 7985 7986 parseId(Ctrl); 7987 7988 if (Ctrl == "row_mirror") { 7989 Val = DppCtrl::ROW_MIRROR; 7990 } else if (Ctrl == "row_half_mirror") { 7991 Val = DppCtrl::ROW_HALF_MIRROR; 7992 } else { 7993 if (skipToken(AsmToken::Colon, "expected a colon")) { 7994 if (Ctrl == "quad_perm") { 7995 Val = parseDPPCtrlPerm(); 7996 } else { 7997 Val = parseDPPCtrlSel(Ctrl); 7998 } 7999 } 8000 } 8001 8002 if (Val == -1) 8003 return MatchOperand_ParseFail; 8004 8005 Operands.push_back( 8006 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 8007 return MatchOperand_Success; 8008 } 8009 8010 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 8011 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 8012 } 8013 8014 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 8015 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 8016 } 8017 8018 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 8019 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 8020 } 8021 8022 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 8023 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 8024 } 8025 8026 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 8027 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 8028 } 8029 8030 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 8031 OptionalImmIndexMap OptionalIdx; 8032 8033 unsigned Opc = Inst.getOpcode(); 8034 bool HasModifiers = 8035 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1; 8036 unsigned I = 1; 8037 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8038 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8039 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8040 } 8041 8042 int Fi = 0; 8043 for (unsigned E = Operands.size(); I != E; ++I) { 8044 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 8045 MCOI::TIED_TO); 8046 if (TiedTo != -1) { 8047 assert((unsigned)TiedTo < Inst.getNumOperands()); 8048 // handle tied old or src2 for MAC instructions 8049 Inst.addOperand(Inst.getOperand(TiedTo)); 8050 } 8051 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8052 // Add the register arguments 8053 if (Op.isReg() && validateVccOperand(Op.getReg())) { 8054 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 8055 // Skip it. 8056 continue; 8057 } 8058 8059 if (IsDPP8) { 8060 if (Op.isDPP8()) { 8061 Op.addImmOperands(Inst, 1); 8062 } else if (HasModifiers && 8063 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8064 Op.addRegWithFPInputModsOperands(Inst, 2); 8065 } else if (Op.isFI()) { 8066 Fi = Op.getImm(); 8067 } else if (Op.isReg()) { 8068 Op.addRegOperands(Inst, 1); 8069 } else { 8070 llvm_unreachable("Invalid operand type"); 8071 } 8072 } else { 8073 if (HasModifiers && 8074 isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8075 Op.addRegWithFPInputModsOperands(Inst, 2); 8076 } else if (Op.isReg()) { 8077 Op.addRegOperands(Inst, 1); 8078 } else if (Op.isDPPCtrl()) { 8079 Op.addImmOperands(Inst, 1); 8080 } else if (Op.isImm()) { 8081 // Handle optional arguments 8082 OptionalIdx[Op.getImmTy()] = I; 8083 } else { 8084 llvm_unreachable("Invalid operand type"); 8085 } 8086 } 8087 } 8088 8089 if (IsDPP8) { 8090 using namespace llvm::AMDGPU::DPP; 8091 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 8092 } else { 8093 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 8094 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 8095 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 8096 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 8097 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 8098 } 8099 } 8100 } 8101 8102 //===----------------------------------------------------------------------===// 8103 // sdwa 8104 //===----------------------------------------------------------------------===// 8105 8106 OperandMatchResultTy 8107 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 8108 AMDGPUOperand::ImmTy Type) { 8109 using namespace llvm::AMDGPU::SDWA; 8110 8111 SMLoc S = getLoc(); 8112 StringRef Value; 8113 OperandMatchResultTy res; 8114 8115 SMLoc StringLoc; 8116 res = parseStringWithPrefix(Prefix, Value, StringLoc); 8117 if (res != MatchOperand_Success) { 8118 return res; 8119 } 8120 8121 int64_t Int; 8122 Int = StringSwitch<int64_t>(Value) 8123 .Case("BYTE_0", SdwaSel::BYTE_0) 8124 .Case("BYTE_1", SdwaSel::BYTE_1) 8125 .Case("BYTE_2", SdwaSel::BYTE_2) 8126 .Case("BYTE_3", SdwaSel::BYTE_3) 8127 .Case("WORD_0", SdwaSel::WORD_0) 8128 .Case("WORD_1", SdwaSel::WORD_1) 8129 .Case("DWORD", SdwaSel::DWORD) 8130 .Default(0xffffffff); 8131 8132 if (Int == 0xffffffff) { 8133 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 8134 return MatchOperand_ParseFail; 8135 } 8136 8137 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8138 return MatchOperand_Success; 8139 } 8140 8141 OperandMatchResultTy 8142 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8143 using namespace llvm::AMDGPU::SDWA; 8144 8145 SMLoc S = getLoc(); 8146 StringRef Value; 8147 OperandMatchResultTy res; 8148 8149 SMLoc StringLoc; 8150 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8151 if (res != MatchOperand_Success) { 8152 return res; 8153 } 8154 8155 int64_t Int; 8156 Int = StringSwitch<int64_t>(Value) 8157 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8158 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8159 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8160 .Default(0xffffffff); 8161 8162 if (Int == 0xffffffff) { 8163 Error(StringLoc, "invalid dst_unused value"); 8164 return MatchOperand_ParseFail; 8165 } 8166 8167 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8168 return MatchOperand_Success; 8169 } 8170 8171 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8172 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8173 } 8174 8175 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8176 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8177 } 8178 8179 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8180 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8181 } 8182 8183 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8184 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8185 } 8186 8187 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8188 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8189 } 8190 8191 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8192 uint64_t BasicInstType, 8193 bool SkipDstVcc, 8194 bool SkipSrcVcc) { 8195 using namespace llvm::AMDGPU::SDWA; 8196 8197 OptionalImmIndexMap OptionalIdx; 8198 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8199 bool SkippedVcc = false; 8200 8201 unsigned I = 1; 8202 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8203 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8204 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8205 } 8206 8207 for (unsigned E = Operands.size(); I != E; ++I) { 8208 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8209 if (SkipVcc && !SkippedVcc && Op.isReg() && 8210 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8211 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8212 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8213 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8214 // Skip VCC only if we didn't skip it on previous iteration. 8215 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8216 if (BasicInstType == SIInstrFlags::VOP2 && 8217 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8218 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8219 SkippedVcc = true; 8220 continue; 8221 } else if (BasicInstType == SIInstrFlags::VOPC && 8222 Inst.getNumOperands() == 0) { 8223 SkippedVcc = true; 8224 continue; 8225 } 8226 } 8227 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8228 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8229 } else if (Op.isImm()) { 8230 // Handle optional arguments 8231 OptionalIdx[Op.getImmTy()] = I; 8232 } else { 8233 llvm_unreachable("Invalid operand type"); 8234 } 8235 SkippedVcc = false; 8236 } 8237 8238 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8239 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8240 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8241 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8242 switch (BasicInstType) { 8243 case SIInstrFlags::VOP1: 8244 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8245 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8246 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8247 } 8248 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8249 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8250 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8251 break; 8252 8253 case SIInstrFlags::VOP2: 8254 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8255 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8256 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8257 } 8258 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8259 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8260 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8261 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8262 break; 8263 8264 case SIInstrFlags::VOPC: 8265 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8266 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8268 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8269 break; 8270 8271 default: 8272 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8273 } 8274 } 8275 8276 // special case v_mac_{f16, f32}: 8277 // it has src2 register operand that is tied to dst operand 8278 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8279 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8280 auto it = Inst.begin(); 8281 std::advance( 8282 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8283 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8284 } 8285 } 8286 8287 //===----------------------------------------------------------------------===// 8288 // mAI 8289 //===----------------------------------------------------------------------===// 8290 8291 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8292 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8293 } 8294 8295 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8296 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8297 } 8298 8299 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8300 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8301 } 8302 8303 /// Force static initialization. 8304 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8305 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8306 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8307 } 8308 8309 #define GET_REGISTER_MATCHER 8310 #define GET_MATCHER_IMPLEMENTATION 8311 #define GET_MNEMONIC_SPELL_CHECKER 8312 #define GET_MNEMONIC_CHECKER 8313 #include "AMDGPUGenAsmMatcher.inc" 8314 8315 // This fuction should be defined after auto-generated include so that we have 8316 // MatchClassKind enum defined 8317 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8318 unsigned Kind) { 8319 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8320 // But MatchInstructionImpl() expects to meet token and fails to validate 8321 // operand. This method checks if we are given immediate operand but expect to 8322 // get corresponding token. 8323 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8324 switch (Kind) { 8325 case MCK_addr64: 8326 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8327 case MCK_gds: 8328 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8329 case MCK_lds: 8330 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8331 case MCK_idxen: 8332 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8333 case MCK_offen: 8334 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8335 case MCK_SSrcB32: 8336 // When operands have expression values, they will return true for isToken, 8337 // because it is not possible to distinguish between a token and an 8338 // expression at parse time. MatchInstructionImpl() will always try to 8339 // match an operand as a token, when isToken returns true, and when the 8340 // name of the expression is not a valid token, the match will fail, 8341 // so we need to handle it here. 8342 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8343 case MCK_SSrcF32: 8344 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8345 case MCK_SoppBrTarget: 8346 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8347 case MCK_VReg32OrOff: 8348 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8349 case MCK_InterpSlot: 8350 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8351 case MCK_Attr: 8352 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8353 case MCK_AttrChan: 8354 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8355 case MCK_ImmSMEMOffset: 8356 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8357 case MCK_SReg_64: 8358 case MCK_SReg_64_XEXEC: 8359 // Null is defined as a 32-bit register but 8360 // it should also be enabled with 64-bit operands. 8361 // The following code enables it for SReg_64 operands 8362 // used as source and destination. Remaining source 8363 // operands are handled in isInlinableImm. 8364 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8365 default: 8366 return Match_InvalidOperand; 8367 } 8368 } 8369 8370 //===----------------------------------------------------------------------===// 8371 // endpgm 8372 //===----------------------------------------------------------------------===// 8373 8374 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8375 SMLoc S = getLoc(); 8376 int64_t Imm = 0; 8377 8378 if (!parseExpr(Imm)) { 8379 // The operand is optional, if not present default to 0 8380 Imm = 0; 8381 } 8382 8383 if (!isUInt<16>(Imm)) { 8384 Error(S, "expected a 16-bit value"); 8385 return MatchOperand_ParseFail; 8386 } 8387 8388 Operands.push_back( 8389 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8390 return MatchOperand_Success; 8391 } 8392 8393 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8394