1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 bool isSWZ() const { return isImmTy(ImmTySWZ); } 342 bool isTFE() const { return isImmTy(ImmTyTFE); } 343 bool isD16() const { return isImmTy(ImmTyD16); } 344 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 345 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 346 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 347 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 348 bool isFI() const { return isImmTy(ImmTyDppFi); } 349 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 350 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 351 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 352 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 353 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 354 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 355 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 356 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 357 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 358 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 359 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 360 bool isHigh() const { return isImmTy(ImmTyHigh); } 361 362 bool isMod() const { 363 return isClampSI() || isOModSI(); 364 } 365 366 bool isRegOrImm() const { 367 return isReg() || isImm(); 368 } 369 370 bool isRegClass(unsigned RCID) const; 371 372 bool isInlineValue() const; 373 374 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 375 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 376 } 377 378 bool isSCSrcB16() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 380 } 381 382 bool isSCSrcV2B16() const { 383 return isSCSrcB16(); 384 } 385 386 bool isSCSrcB32() const { 387 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 388 } 389 390 bool isSCSrcB64() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 392 } 393 394 bool isBoolReg() const; 395 396 bool isSCSrcF16() const { 397 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 398 } 399 400 bool isSCSrcV2F16() const { 401 return isSCSrcF16(); 402 } 403 404 bool isSCSrcF32() const { 405 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 406 } 407 408 bool isSCSrcF64() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 410 } 411 412 bool isSSrcB32() const { 413 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 414 } 415 416 bool isSSrcB16() const { 417 return isSCSrcB16() || isLiteralImm(MVT::i16); 418 } 419 420 bool isSSrcV2B16() const { 421 llvm_unreachable("cannot happen"); 422 return isSSrcB16(); 423 } 424 425 bool isSSrcB64() const { 426 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 427 // See isVSrc64(). 428 return isSCSrcB64() || isLiteralImm(MVT::i64); 429 } 430 431 bool isSSrcF32() const { 432 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 433 } 434 435 bool isSSrcF64() const { 436 return isSCSrcB64() || isLiteralImm(MVT::f64); 437 } 438 439 bool isSSrcF16() const { 440 return isSCSrcB16() || isLiteralImm(MVT::f16); 441 } 442 443 bool isSSrcV2F16() const { 444 llvm_unreachable("cannot happen"); 445 return isSSrcF16(); 446 } 447 448 bool isSSrcV2FP32() const { 449 llvm_unreachable("cannot happen"); 450 return isSSrcF32(); 451 } 452 453 bool isSCSrcV2FP32() const { 454 llvm_unreachable("cannot happen"); 455 return isSCSrcF32(); 456 } 457 458 bool isSSrcV2INT32() const { 459 llvm_unreachable("cannot happen"); 460 return isSSrcB32(); 461 } 462 463 bool isSCSrcV2INT32() const { 464 llvm_unreachable("cannot happen"); 465 return isSCSrcB32(); 466 } 467 468 bool isSSrcOrLdsB32() const { 469 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 470 isLiteralImm(MVT::i32) || isExpr(); 471 } 472 473 bool isVCSrcB32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 475 } 476 477 bool isVCSrcB64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 479 } 480 481 bool isVCSrcB16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 483 } 484 485 bool isVCSrcV2B16() const { 486 return isVCSrcB16(); 487 } 488 489 bool isVCSrcF32() const { 490 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 491 } 492 493 bool isVCSrcF64() const { 494 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 495 } 496 497 bool isVCSrcF16() const { 498 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 499 } 500 501 bool isVCSrcV2F16() const { 502 return isVCSrcF16(); 503 } 504 505 bool isVSrcB32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 507 } 508 509 bool isVSrcB64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::i64); 511 } 512 513 bool isVSrcB16() const { 514 return isVCSrcB16() || isLiteralImm(MVT::i16); 515 } 516 517 bool isVSrcV2B16() const { 518 return isVSrcB16() || isLiteralImm(MVT::v2i16); 519 } 520 521 bool isVCSrcV2FP32() const { 522 return isVCSrcF64(); 523 } 524 525 bool isVSrcV2FP32() const { 526 return isVSrcF64() || isLiteralImm(MVT::v2f32); 527 } 528 529 bool isVCSrcV2INT32() const { 530 return isVCSrcB64(); 531 } 532 533 bool isVSrcV2INT32() const { 534 return isVSrcB64() || isLiteralImm(MVT::v2i32); 535 } 536 537 bool isVSrcF32() const { 538 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 539 } 540 541 bool isVSrcF64() const { 542 return isVCSrcF64() || isLiteralImm(MVT::f64); 543 } 544 545 bool isVSrcF16() const { 546 return isVCSrcF16() || isLiteralImm(MVT::f16); 547 } 548 549 bool isVSrcV2F16() const { 550 return isVSrcF16() || isLiteralImm(MVT::v2f16); 551 } 552 553 bool isVISrcB32() const { 554 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 555 } 556 557 bool isVISrcB16() const { 558 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 559 } 560 561 bool isVISrcV2B16() const { 562 return isVISrcB16(); 563 } 564 565 bool isVISrcF32() const { 566 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 567 } 568 569 bool isVISrcF16() const { 570 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 571 } 572 573 bool isVISrcV2F16() const { 574 return isVISrcF16() || isVISrcB32(); 575 } 576 577 bool isVISrc_64B64() const { 578 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 579 } 580 581 bool isVISrc_64F64() const { 582 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 583 } 584 585 bool isVISrc_64V2FP32() const { 586 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 587 } 588 589 bool isVISrc_64V2INT32() const { 590 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 591 } 592 593 bool isVISrc_256B64() const { 594 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 595 } 596 597 bool isVISrc_256F64() const { 598 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 599 } 600 601 bool isVISrc_128B16() const { 602 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 603 } 604 605 bool isVISrc_128V2B16() const { 606 return isVISrc_128B16(); 607 } 608 609 bool isVISrc_128B32() const { 610 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 611 } 612 613 bool isVISrc_128F32() const { 614 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 615 } 616 617 bool isVISrc_256V2FP32() const { 618 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 619 } 620 621 bool isVISrc_256V2INT32() const { 622 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 623 } 624 625 bool isVISrc_512B32() const { 626 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 627 } 628 629 bool isVISrc_512B16() const { 630 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 631 } 632 633 bool isVISrc_512V2B16() const { 634 return isVISrc_512B16(); 635 } 636 637 bool isVISrc_512F32() const { 638 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 639 } 640 641 bool isVISrc_512F16() const { 642 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 643 } 644 645 bool isVISrc_512V2F16() const { 646 return isVISrc_512F16() || isVISrc_512B32(); 647 } 648 649 bool isVISrc_1024B32() const { 650 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 651 } 652 653 bool isVISrc_1024B16() const { 654 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 655 } 656 657 bool isVISrc_1024V2B16() const { 658 return isVISrc_1024B16(); 659 } 660 661 bool isVISrc_1024F32() const { 662 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 663 } 664 665 bool isVISrc_1024F16() const { 666 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 667 } 668 669 bool isVISrc_1024V2F16() const { 670 return isVISrc_1024F16() || isVISrc_1024B32(); 671 } 672 673 bool isAISrcB32() const { 674 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 675 } 676 677 bool isAISrcB16() const { 678 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 679 } 680 681 bool isAISrcV2B16() const { 682 return isAISrcB16(); 683 } 684 685 bool isAISrcF32() const { 686 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 687 } 688 689 bool isAISrcF16() const { 690 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 691 } 692 693 bool isAISrcV2F16() const { 694 return isAISrcF16() || isAISrcB32(); 695 } 696 697 bool isAISrc_64B64() const { 698 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 699 } 700 701 bool isAISrc_64F64() const { 702 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 703 } 704 705 bool isAISrc_128B32() const { 706 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 707 } 708 709 bool isAISrc_128B16() const { 710 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 711 } 712 713 bool isAISrc_128V2B16() const { 714 return isAISrc_128B16(); 715 } 716 717 bool isAISrc_128F32() const { 718 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 719 } 720 721 bool isAISrc_128F16() const { 722 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 723 } 724 725 bool isAISrc_128V2F16() const { 726 return isAISrc_128F16() || isAISrc_128B32(); 727 } 728 729 bool isVISrc_128F16() const { 730 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 731 } 732 733 bool isVISrc_128V2F16() const { 734 return isVISrc_128F16() || isVISrc_128B32(); 735 } 736 737 bool isAISrc_256B64() const { 738 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 739 } 740 741 bool isAISrc_256F64() const { 742 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 743 } 744 745 bool isAISrc_512B32() const { 746 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 747 } 748 749 bool isAISrc_512B16() const { 750 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 751 } 752 753 bool isAISrc_512V2B16() const { 754 return isAISrc_512B16(); 755 } 756 757 bool isAISrc_512F32() const { 758 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 759 } 760 761 bool isAISrc_512F16() const { 762 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 763 } 764 765 bool isAISrc_512V2F16() const { 766 return isAISrc_512F16() || isAISrc_512B32(); 767 } 768 769 bool isAISrc_1024B32() const { 770 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 771 } 772 773 bool isAISrc_1024B16() const { 774 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 775 } 776 777 bool isAISrc_1024V2B16() const { 778 return isAISrc_1024B16(); 779 } 780 781 bool isAISrc_1024F32() const { 782 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 783 } 784 785 bool isAISrc_1024F16() const { 786 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 787 } 788 789 bool isAISrc_1024V2F16() const { 790 return isAISrc_1024F16() || isAISrc_1024B32(); 791 } 792 793 bool isKImmFP32() const { 794 return isLiteralImm(MVT::f32); 795 } 796 797 bool isKImmFP16() const { 798 return isLiteralImm(MVT::f16); 799 } 800 801 bool isMem() const override { 802 return false; 803 } 804 805 bool isExpr() const { 806 return Kind == Expression; 807 } 808 809 bool isSoppBrTarget() const { 810 return isExpr() || isImm(); 811 } 812 813 bool isSWaitCnt() const; 814 bool isHwreg() const; 815 bool isSendMsg() const; 816 bool isSwizzle() const; 817 bool isSMRDOffset8() const; 818 bool isSMEMOffset() const; 819 bool isSMRDLiteralOffset() const; 820 bool isDPP8() const; 821 bool isDPPCtrl() const; 822 bool isBLGP() const; 823 bool isCBSZ() const; 824 bool isABID() const; 825 bool isGPRIdxMode() const; 826 bool isS16Imm() const; 827 bool isU16Imm() const; 828 bool isEndpgm() const; 829 830 StringRef getExpressionAsToken() const { 831 assert(isExpr()); 832 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 833 return S->getSymbol().getName(); 834 } 835 836 StringRef getToken() const { 837 assert(isToken()); 838 839 if (Kind == Expression) 840 return getExpressionAsToken(); 841 842 return StringRef(Tok.Data, Tok.Length); 843 } 844 845 int64_t getImm() const { 846 assert(isImm()); 847 return Imm.Val; 848 } 849 850 void setImm(int64_t Val) { 851 assert(isImm()); 852 Imm.Val = Val; 853 } 854 855 ImmTy getImmTy() const { 856 assert(isImm()); 857 return Imm.Type; 858 } 859 860 unsigned getReg() const override { 861 assert(isRegKind()); 862 return Reg.RegNo; 863 } 864 865 SMLoc getStartLoc() const override { 866 return StartLoc; 867 } 868 869 SMLoc getEndLoc() const override { 870 return EndLoc; 871 } 872 873 SMRange getLocRange() const { 874 return SMRange(StartLoc, EndLoc); 875 } 876 877 Modifiers getModifiers() const { 878 assert(isRegKind() || isImmTy(ImmTyNone)); 879 return isRegKind() ? Reg.Mods : Imm.Mods; 880 } 881 882 void setModifiers(Modifiers Mods) { 883 assert(isRegKind() || isImmTy(ImmTyNone)); 884 if (isRegKind()) 885 Reg.Mods = Mods; 886 else 887 Imm.Mods = Mods; 888 } 889 890 bool hasModifiers() const { 891 return getModifiers().hasModifiers(); 892 } 893 894 bool hasFPModifiers() const { 895 return getModifiers().hasFPModifiers(); 896 } 897 898 bool hasIntModifiers() const { 899 return getModifiers().hasIntModifiers(); 900 } 901 902 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 903 904 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 905 906 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 907 908 template <unsigned Bitwidth> 909 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 910 911 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 912 addKImmFPOperands<16>(Inst, N); 913 } 914 915 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 916 addKImmFPOperands<32>(Inst, N); 917 } 918 919 void addRegOperands(MCInst &Inst, unsigned N) const; 920 921 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 922 addRegOperands(Inst, N); 923 } 924 925 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 926 if (isRegKind()) 927 addRegOperands(Inst, N); 928 else if (isExpr()) 929 Inst.addOperand(MCOperand::createExpr(Expr)); 930 else 931 addImmOperands(Inst, N); 932 } 933 934 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 935 Modifiers Mods = getModifiers(); 936 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 937 if (isRegKind()) { 938 addRegOperands(Inst, N); 939 } else { 940 addImmOperands(Inst, N, false); 941 } 942 } 943 944 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 945 assert(!hasIntModifiers()); 946 addRegOrImmWithInputModsOperands(Inst, N); 947 } 948 949 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 950 assert(!hasFPModifiers()); 951 addRegOrImmWithInputModsOperands(Inst, N); 952 } 953 954 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 955 Modifiers Mods = getModifiers(); 956 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 957 assert(isRegKind()); 958 addRegOperands(Inst, N); 959 } 960 961 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 962 assert(!hasIntModifiers()); 963 addRegWithInputModsOperands(Inst, N); 964 } 965 966 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 967 assert(!hasFPModifiers()); 968 addRegWithInputModsOperands(Inst, N); 969 } 970 971 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 972 if (isImm()) 973 addImmOperands(Inst, N); 974 else { 975 assert(isExpr()); 976 Inst.addOperand(MCOperand::createExpr(Expr)); 977 } 978 } 979 980 static void printImmTy(raw_ostream& OS, ImmTy Type) { 981 switch (Type) { 982 case ImmTyNone: OS << "None"; break; 983 case ImmTyGDS: OS << "GDS"; break; 984 case ImmTyLDS: OS << "LDS"; break; 985 case ImmTyOffen: OS << "Offen"; break; 986 case ImmTyIdxen: OS << "Idxen"; break; 987 case ImmTyAddr64: OS << "Addr64"; break; 988 case ImmTyOffset: OS << "Offset"; break; 989 case ImmTyInstOffset: OS << "InstOffset"; break; 990 case ImmTyOffset0: OS << "Offset0"; break; 991 case ImmTyOffset1: OS << "Offset1"; break; 992 case ImmTyCPol: OS << "CPol"; break; 993 case ImmTySWZ: OS << "SWZ"; break; 994 case ImmTyTFE: OS << "TFE"; break; 995 case ImmTyD16: OS << "D16"; break; 996 case ImmTyFORMAT: OS << "FORMAT"; break; 997 case ImmTyClampSI: OS << "ClampSI"; break; 998 case ImmTyOModSI: OS << "OModSI"; break; 999 case ImmTyDPP8: OS << "DPP8"; break; 1000 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1001 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1002 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1003 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1004 case ImmTyDppFi: OS << "FI"; break; 1005 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1006 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1007 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1008 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1009 case ImmTyDMask: OS << "DMask"; break; 1010 case ImmTyDim: OS << "Dim"; break; 1011 case ImmTyUNorm: OS << "UNorm"; break; 1012 case ImmTyDA: OS << "DA"; break; 1013 case ImmTyR128A16: OS << "R128A16"; break; 1014 case ImmTyA16: OS << "A16"; break; 1015 case ImmTyLWE: OS << "LWE"; break; 1016 case ImmTyOff: OS << "Off"; break; 1017 case ImmTyExpTgt: OS << "ExpTgt"; break; 1018 case ImmTyExpCompr: OS << "ExpCompr"; break; 1019 case ImmTyExpVM: OS << "ExpVM"; break; 1020 case ImmTyHwreg: OS << "Hwreg"; break; 1021 case ImmTySendMsg: OS << "SendMsg"; break; 1022 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1023 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1024 case ImmTyAttrChan: OS << "AttrChan"; break; 1025 case ImmTyOpSel: OS << "OpSel"; break; 1026 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1027 case ImmTyNegLo: OS << "NegLo"; break; 1028 case ImmTyNegHi: OS << "NegHi"; break; 1029 case ImmTySwizzle: OS << "Swizzle"; break; 1030 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1031 case ImmTyHigh: OS << "High"; break; 1032 case ImmTyBLGP: OS << "BLGP"; break; 1033 case ImmTyCBSZ: OS << "CBSZ"; break; 1034 case ImmTyABID: OS << "ABID"; break; 1035 case ImmTyEndpgm: OS << "Endpgm"; break; 1036 } 1037 } 1038 1039 void print(raw_ostream &OS) const override { 1040 switch (Kind) { 1041 case Register: 1042 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1043 break; 1044 case Immediate: 1045 OS << '<' << getImm(); 1046 if (getImmTy() != ImmTyNone) { 1047 OS << " type: "; printImmTy(OS, getImmTy()); 1048 } 1049 OS << " mods: " << Imm.Mods << '>'; 1050 break; 1051 case Token: 1052 OS << '\'' << getToken() << '\''; 1053 break; 1054 case Expression: 1055 OS << "<expr " << *Expr << '>'; 1056 break; 1057 } 1058 } 1059 1060 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1061 int64_t Val, SMLoc Loc, 1062 ImmTy Type = ImmTyNone, 1063 bool IsFPImm = false) { 1064 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1065 Op->Imm.Val = Val; 1066 Op->Imm.IsFPImm = IsFPImm; 1067 Op->Imm.Kind = ImmKindTyNone; 1068 Op->Imm.Type = Type; 1069 Op->Imm.Mods = Modifiers(); 1070 Op->StartLoc = Loc; 1071 Op->EndLoc = Loc; 1072 return Op; 1073 } 1074 1075 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1076 StringRef Str, SMLoc Loc, 1077 bool HasExplicitEncodingSize = true) { 1078 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1079 Res->Tok.Data = Str.data(); 1080 Res->Tok.Length = Str.size(); 1081 Res->StartLoc = Loc; 1082 Res->EndLoc = Loc; 1083 return Res; 1084 } 1085 1086 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1087 unsigned RegNo, SMLoc S, 1088 SMLoc E) { 1089 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1090 Op->Reg.RegNo = RegNo; 1091 Op->Reg.Mods = Modifiers(); 1092 Op->StartLoc = S; 1093 Op->EndLoc = E; 1094 return Op; 1095 } 1096 1097 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1098 const class MCExpr *Expr, SMLoc S) { 1099 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1100 Op->Expr = Expr; 1101 Op->StartLoc = S; 1102 Op->EndLoc = S; 1103 return Op; 1104 } 1105 }; 1106 1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1108 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1109 return OS; 1110 } 1111 1112 //===----------------------------------------------------------------------===// 1113 // AsmParser 1114 //===----------------------------------------------------------------------===// 1115 1116 // Holds info related to the current kernel, e.g. count of SGPRs used. 1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1118 // .amdgpu_hsa_kernel or at EOF. 1119 class KernelScopeInfo { 1120 int SgprIndexUnusedMin = -1; 1121 int VgprIndexUnusedMin = -1; 1122 MCContext *Ctx = nullptr; 1123 1124 void usesSgprAt(int i) { 1125 if (i >= SgprIndexUnusedMin) { 1126 SgprIndexUnusedMin = ++i; 1127 if (Ctx) { 1128 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1129 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1130 } 1131 } 1132 } 1133 1134 void usesVgprAt(int i) { 1135 if (i >= VgprIndexUnusedMin) { 1136 VgprIndexUnusedMin = ++i; 1137 if (Ctx) { 1138 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1139 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1140 } 1141 } 1142 } 1143 1144 public: 1145 KernelScopeInfo() = default; 1146 1147 void initialize(MCContext &Context) { 1148 Ctx = &Context; 1149 usesSgprAt(SgprIndexUnusedMin = -1); 1150 usesVgprAt(VgprIndexUnusedMin = -1); 1151 } 1152 1153 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1154 switch (RegKind) { 1155 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1156 case IS_AGPR: // fall through 1157 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1158 default: break; 1159 } 1160 } 1161 }; 1162 1163 class AMDGPUAsmParser : public MCTargetAsmParser { 1164 MCAsmParser &Parser; 1165 1166 // Number of extra operands parsed after the first optional operand. 1167 // This may be necessary to skip hardcoded mandatory operands. 1168 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1169 1170 unsigned ForcedEncodingSize = 0; 1171 bool ForcedDPP = false; 1172 bool ForcedSDWA = false; 1173 KernelScopeInfo KernelScope; 1174 unsigned CPolSeen; 1175 1176 /// @name Auto-generated Match Functions 1177 /// { 1178 1179 #define GET_ASSEMBLER_HEADER 1180 #include "AMDGPUGenAsmMatcher.inc" 1181 1182 /// } 1183 1184 private: 1185 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1186 bool OutOfRangeError(SMRange Range); 1187 /// Calculate VGPR/SGPR blocks required for given target, reserved 1188 /// registers, and user-specified NextFreeXGPR values. 1189 /// 1190 /// \param Features [in] Target features, used for bug corrections. 1191 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1192 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1193 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1194 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1195 /// descriptor field, if valid. 1196 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1197 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1198 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1199 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1200 /// \param VGPRBlocks [out] Result VGPR block count. 1201 /// \param SGPRBlocks [out] Result SGPR block count. 1202 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1203 bool FlatScrUsed, bool XNACKUsed, 1204 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1205 SMRange VGPRRange, unsigned NextFreeSGPR, 1206 SMRange SGPRRange, unsigned &VGPRBlocks, 1207 unsigned &SGPRBlocks); 1208 bool ParseDirectiveAMDGCNTarget(); 1209 bool ParseDirectiveAMDHSAKernel(); 1210 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1211 bool ParseDirectiveHSACodeObjectVersion(); 1212 bool ParseDirectiveHSACodeObjectISA(); 1213 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1214 bool ParseDirectiveAMDKernelCodeT(); 1215 // TODO: Possibly make subtargetHasRegister const. 1216 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo); 1217 bool ParseDirectiveAMDGPUHsaKernel(); 1218 1219 bool ParseDirectiveISAVersion(); 1220 bool ParseDirectiveHSAMetadata(); 1221 bool ParseDirectivePALMetadataBegin(); 1222 bool ParseDirectivePALMetadata(); 1223 bool ParseDirectiveAMDGPULDS(); 1224 1225 /// Common code to parse out a block of text (typically YAML) between start and 1226 /// end directives. 1227 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1228 const char *AssemblerDirectiveEnd, 1229 std::string &CollectString); 1230 1231 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1232 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1233 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1234 unsigned &RegNum, unsigned &RegWidth, 1235 bool RestoreOnFailure = false); 1236 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1237 unsigned &RegNum, unsigned &RegWidth, 1238 SmallVectorImpl<AsmToken> &Tokens); 1239 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1240 unsigned &RegWidth, 1241 SmallVectorImpl<AsmToken> &Tokens); 1242 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1243 unsigned &RegWidth, 1244 SmallVectorImpl<AsmToken> &Tokens); 1245 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1246 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1247 bool ParseRegRange(unsigned& Num, unsigned& Width); 1248 unsigned getRegularReg(RegisterKind RegKind, 1249 unsigned RegNum, 1250 unsigned RegWidth, 1251 SMLoc Loc); 1252 1253 bool isRegister(); 1254 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1255 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1256 void initializeGprCountSymbol(RegisterKind RegKind); 1257 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1258 unsigned RegWidth); 1259 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1260 bool IsAtomic, bool IsLds = false); 1261 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsGdsHardcoded); 1263 1264 public: 1265 enum AMDGPUMatchResultTy { 1266 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1267 }; 1268 enum OperandMode { 1269 OperandMode_Default, 1270 OperandMode_NSA, 1271 }; 1272 1273 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1274 1275 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1276 const MCInstrInfo &MII, 1277 const MCTargetOptions &Options) 1278 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1279 MCAsmParserExtension::Initialize(Parser); 1280 1281 if (getFeatureBits().none()) { 1282 // Set default features. 1283 copySTI().ToggleFeature("southern-islands"); 1284 } 1285 1286 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1287 1288 { 1289 // TODO: make those pre-defined variables read-only. 1290 // Currently there is none suitable machinery in the core llvm-mc for this. 1291 // MCSymbol::isRedefinable is intended for another purpose, and 1292 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1293 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1294 MCContext &Ctx = getContext(); 1295 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1296 MCSymbol *Sym = 1297 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1298 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1299 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1303 } else { 1304 MCSymbol *Sym = 1305 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1306 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1307 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1311 } 1312 if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) { 1313 initializeGprCountSymbol(IS_VGPR); 1314 initializeGprCountSymbol(IS_SGPR); 1315 } else 1316 KernelScope.initialize(getContext()); 1317 } 1318 } 1319 1320 bool hasMIMG_R128() const { 1321 return AMDGPU::hasMIMG_R128(getSTI()); 1322 } 1323 1324 bool hasPackedD16() const { 1325 return AMDGPU::hasPackedD16(getSTI()); 1326 } 1327 1328 bool hasGFX10A16() const { 1329 return AMDGPU::hasGFX10A16(getSTI()); 1330 } 1331 1332 bool isSI() const { 1333 return AMDGPU::isSI(getSTI()); 1334 } 1335 1336 bool isCI() const { 1337 return AMDGPU::isCI(getSTI()); 1338 } 1339 1340 bool isVI() const { 1341 return AMDGPU::isVI(getSTI()); 1342 } 1343 1344 bool isGFX9() const { 1345 return AMDGPU::isGFX9(getSTI()); 1346 } 1347 1348 bool isGFX90A() const { 1349 return AMDGPU::isGFX90A(getSTI()); 1350 } 1351 1352 bool isGFX9Plus() const { 1353 return AMDGPU::isGFX9Plus(getSTI()); 1354 } 1355 1356 bool isGFX10() const { 1357 return AMDGPU::isGFX10(getSTI()); 1358 } 1359 1360 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1361 1362 bool isGFX10_BEncoding() const { 1363 return AMDGPU::isGFX10_BEncoding(getSTI()); 1364 } 1365 1366 bool hasInv2PiInlineImm() const { 1367 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1368 } 1369 1370 bool hasFlatOffsets() const { 1371 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1372 } 1373 1374 bool hasSGPR102_SGPR103() const { 1375 return !isVI() && !isGFX9(); 1376 } 1377 1378 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1379 1380 bool hasIntClamp() const { 1381 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1382 } 1383 1384 AMDGPUTargetStreamer &getTargetStreamer() { 1385 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1386 return static_cast<AMDGPUTargetStreamer &>(TS); 1387 } 1388 1389 const MCRegisterInfo *getMRI() const { 1390 // We need this const_cast because for some reason getContext() is not const 1391 // in MCAsmParser. 1392 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1393 } 1394 1395 const MCInstrInfo *getMII() const { 1396 return &MII; 1397 } 1398 1399 const FeatureBitset &getFeatureBits() const { 1400 return getSTI().getFeatureBits(); 1401 } 1402 1403 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1404 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1405 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1406 1407 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1408 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1409 bool isForcedDPP() const { return ForcedDPP; } 1410 bool isForcedSDWA() const { return ForcedSDWA; } 1411 ArrayRef<unsigned> getMatchedVariants() const; 1412 StringRef getMatchedVariantName() const; 1413 1414 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1415 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1416 bool RestoreOnFailure); 1417 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1418 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1419 SMLoc &EndLoc) override; 1420 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1421 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1422 unsigned Kind) override; 1423 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1424 OperandVector &Operands, MCStreamer &Out, 1425 uint64_t &ErrorInfo, 1426 bool MatchingInlineAsm) override; 1427 bool ParseDirective(AsmToken DirectiveID) override; 1428 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1429 OperandMode Mode = OperandMode_Default); 1430 StringRef parseMnemonicSuffix(StringRef Name); 1431 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1432 SMLoc NameLoc, OperandVector &Operands) override; 1433 //bool ProcessInstruction(MCInst &Inst); 1434 1435 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1436 1437 OperandMatchResultTy 1438 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1439 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1440 bool (*ConvertResult)(int64_t &) = nullptr); 1441 1442 OperandMatchResultTy 1443 parseOperandArrayWithPrefix(const char *Prefix, 1444 OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t&) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseNamedBit(StringRef Name, OperandVector &Operands, 1450 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1451 OperandMatchResultTy parseCPol(OperandVector &Operands); 1452 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1453 StringRef &Value, 1454 SMLoc &StringLoc); 1455 1456 bool isModifier(); 1457 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1458 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1459 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1460 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1461 bool parseSP3NegModifier(); 1462 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1463 OperandMatchResultTy parseReg(OperandVector &Operands); 1464 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1465 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1466 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1467 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1468 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1469 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1470 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1471 OperandMatchResultTy parseUfmt(int64_t &Format); 1472 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1473 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1474 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1475 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1476 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1477 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1478 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1479 1480 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1481 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1482 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1483 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1484 1485 bool parseCnt(int64_t &IntVal); 1486 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1487 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1488 1489 private: 1490 struct OperandInfoTy { 1491 SMLoc Loc; 1492 int64_t Id; 1493 bool IsSymbolic = false; 1494 bool IsDefined = false; 1495 1496 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1497 }; 1498 1499 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1500 bool validateSendMsg(const OperandInfoTy &Msg, 1501 const OperandInfoTy &Op, 1502 const OperandInfoTy &Stream); 1503 1504 bool parseHwregBody(OperandInfoTy &HwReg, 1505 OperandInfoTy &Offset, 1506 OperandInfoTy &Width); 1507 bool validateHwreg(const OperandInfoTy &HwReg, 1508 const OperandInfoTy &Offset, 1509 const OperandInfoTy &Width); 1510 1511 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1512 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1513 1514 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1515 const OperandVector &Operands) const; 1516 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1517 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1518 SMLoc getLitLoc(const OperandVector &Operands) const; 1519 SMLoc getConstLoc(const OperandVector &Operands) const; 1520 1521 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1522 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1523 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1524 bool validateSOPLiteral(const MCInst &Inst) const; 1525 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1526 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1527 bool validateIntClampSupported(const MCInst &Inst); 1528 bool validateMIMGAtomicDMask(const MCInst &Inst); 1529 bool validateMIMGGatherDMask(const MCInst &Inst); 1530 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1531 bool validateMIMGDataSize(const MCInst &Inst); 1532 bool validateMIMGAddrSize(const MCInst &Inst); 1533 bool validateMIMGD16(const MCInst &Inst); 1534 bool validateMIMGDim(const MCInst &Inst); 1535 bool validateMIMGMSAA(const MCInst &Inst); 1536 bool validateOpSel(const MCInst &Inst); 1537 bool validateVccOperand(unsigned Reg) const; 1538 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1539 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateAGPRLdSt(const MCInst &Inst) const; 1541 bool validateVGPRAlign(const MCInst &Inst) const; 1542 bool validateDivScale(const MCInst &Inst); 1543 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1544 const SMLoc &IDLoc); 1545 Optional<StringRef> validateLdsDirect(const MCInst &Inst); 1546 unsigned getConstantBusLimit(unsigned Opcode) const; 1547 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1548 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1549 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1550 1551 bool isSupportedMnemo(StringRef Mnemo, 1552 const FeatureBitset &FBS); 1553 bool isSupportedMnemo(StringRef Mnemo, 1554 const FeatureBitset &FBS, 1555 ArrayRef<unsigned> Variants); 1556 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1557 1558 bool isId(const StringRef Id) const; 1559 bool isId(const AsmToken &Token, const StringRef Id) const; 1560 bool isToken(const AsmToken::TokenKind Kind) const; 1561 bool trySkipId(const StringRef Id); 1562 bool trySkipId(const StringRef Pref, const StringRef Id); 1563 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1564 bool trySkipToken(const AsmToken::TokenKind Kind); 1565 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1566 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1567 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1568 1569 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1570 AsmToken::TokenKind getTokenKind() const; 1571 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1572 bool parseExpr(OperandVector &Operands); 1573 StringRef getTokenStr() const; 1574 AsmToken peekToken(); 1575 AsmToken getToken() const; 1576 SMLoc getLoc() const; 1577 void lex(); 1578 1579 public: 1580 void onBeginOfFile() override; 1581 1582 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1583 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1584 1585 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1586 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1587 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1588 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1589 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1590 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1591 1592 bool parseSwizzleOperand(int64_t &Op, 1593 const unsigned MinVal, 1594 const unsigned MaxVal, 1595 const StringRef ErrMsg, 1596 SMLoc &Loc); 1597 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1598 const unsigned MinVal, 1599 const unsigned MaxVal, 1600 const StringRef ErrMsg); 1601 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1602 bool parseSwizzleOffset(int64_t &Imm); 1603 bool parseSwizzleMacro(int64_t &Imm); 1604 bool parseSwizzleQuadPerm(int64_t &Imm); 1605 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1606 bool parseSwizzleBroadcast(int64_t &Imm); 1607 bool parseSwizzleSwap(int64_t &Imm); 1608 bool parseSwizzleReverse(int64_t &Imm); 1609 1610 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1611 int64_t parseGPRIdxMacro(); 1612 1613 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1614 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1615 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1616 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1617 1618 AMDGPUOperand::Ptr defaultCPol() const; 1619 1620 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1621 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1622 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1623 AMDGPUOperand::Ptr defaultFlatOffset() const; 1624 1625 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1626 1627 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1628 OptionalImmIndexMap &OptionalIdx); 1629 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1630 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1631 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1632 1633 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1634 1635 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1636 bool IsAtomic = false); 1637 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1638 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1639 1640 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1641 1642 bool parseDimId(unsigned &Encoding); 1643 OperandMatchResultTy parseDim(OperandVector &Operands); 1644 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1645 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1646 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1647 int64_t parseDPPCtrlSel(StringRef Ctrl); 1648 int64_t parseDPPCtrlPerm(); 1649 AMDGPUOperand::Ptr defaultRowMask() const; 1650 AMDGPUOperand::Ptr defaultBankMask() const; 1651 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1652 AMDGPUOperand::Ptr defaultFI() const; 1653 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1654 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1655 1656 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1657 AMDGPUOperand::ImmTy Type); 1658 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1659 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1660 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1661 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1662 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1663 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1664 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1665 uint64_t BasicInstType, 1666 bool SkipDstVcc = false, 1667 bool SkipSrcVcc = false); 1668 1669 AMDGPUOperand::Ptr defaultBLGP() const; 1670 AMDGPUOperand::Ptr defaultCBSZ() const; 1671 AMDGPUOperand::Ptr defaultABID() const; 1672 1673 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1674 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1675 }; 1676 1677 struct OptionalOperand { 1678 const char *Name; 1679 AMDGPUOperand::ImmTy Type; 1680 bool IsBit; 1681 bool (*ConvertResult)(int64_t&); 1682 }; 1683 1684 } // end anonymous namespace 1685 1686 // May be called with integer type with equivalent bitwidth. 1687 static const fltSemantics *getFltSemantics(unsigned Size) { 1688 switch (Size) { 1689 case 4: 1690 return &APFloat::IEEEsingle(); 1691 case 8: 1692 return &APFloat::IEEEdouble(); 1693 case 2: 1694 return &APFloat::IEEEhalf(); 1695 default: 1696 llvm_unreachable("unsupported fp type"); 1697 } 1698 } 1699 1700 static const fltSemantics *getFltSemantics(MVT VT) { 1701 return getFltSemantics(VT.getSizeInBits() / 8); 1702 } 1703 1704 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1705 switch (OperandType) { 1706 case AMDGPU::OPERAND_REG_IMM_INT32: 1707 case AMDGPU::OPERAND_REG_IMM_FP32: 1708 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1709 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1710 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1711 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1712 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1713 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1714 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1715 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1716 return &APFloat::IEEEsingle(); 1717 case AMDGPU::OPERAND_REG_IMM_INT64: 1718 case AMDGPU::OPERAND_REG_IMM_FP64: 1719 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1720 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1721 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1722 return &APFloat::IEEEdouble(); 1723 case AMDGPU::OPERAND_REG_IMM_INT16: 1724 case AMDGPU::OPERAND_REG_IMM_FP16: 1725 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1726 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1727 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1728 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1729 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1730 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1731 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1732 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1733 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1734 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1735 return &APFloat::IEEEhalf(); 1736 default: 1737 llvm_unreachable("unsupported fp type"); 1738 } 1739 } 1740 1741 //===----------------------------------------------------------------------===// 1742 // Operand 1743 //===----------------------------------------------------------------------===// 1744 1745 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1746 bool Lost; 1747 1748 // Convert literal to single precision 1749 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1750 APFloat::rmNearestTiesToEven, 1751 &Lost); 1752 // We allow precision lost but not overflow or underflow 1753 if (Status != APFloat::opOK && 1754 Lost && 1755 ((Status & APFloat::opOverflow) != 0 || 1756 (Status & APFloat::opUnderflow) != 0)) { 1757 return false; 1758 } 1759 1760 return true; 1761 } 1762 1763 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1764 return isUIntN(Size, Val) || isIntN(Size, Val); 1765 } 1766 1767 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1768 if (VT.getScalarType() == MVT::i16) { 1769 // FP immediate values are broken. 1770 return isInlinableIntLiteral(Val); 1771 } 1772 1773 // f16/v2f16 operands work correctly for all values. 1774 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1775 } 1776 1777 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1778 1779 // This is a hack to enable named inline values like 1780 // shared_base with both 32-bit and 64-bit operands. 1781 // Note that these values are defined as 1782 // 32-bit operands only. 1783 if (isInlineValue()) { 1784 return true; 1785 } 1786 1787 if (!isImmTy(ImmTyNone)) { 1788 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1789 return false; 1790 } 1791 // TODO: We should avoid using host float here. It would be better to 1792 // check the float bit values which is what a few other places do. 1793 // We've had bot failures before due to weird NaN support on mips hosts. 1794 1795 APInt Literal(64, Imm.Val); 1796 1797 if (Imm.IsFPImm) { // We got fp literal token 1798 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1799 return AMDGPU::isInlinableLiteral64(Imm.Val, 1800 AsmParser->hasInv2PiInlineImm()); 1801 } 1802 1803 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1804 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1805 return false; 1806 1807 if (type.getScalarSizeInBits() == 16) { 1808 return isInlineableLiteralOp16( 1809 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1810 type, AsmParser->hasInv2PiInlineImm()); 1811 } 1812 1813 // Check if single precision literal is inlinable 1814 return AMDGPU::isInlinableLiteral32( 1815 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1816 AsmParser->hasInv2PiInlineImm()); 1817 } 1818 1819 // We got int literal token. 1820 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1821 return AMDGPU::isInlinableLiteral64(Imm.Val, 1822 AsmParser->hasInv2PiInlineImm()); 1823 } 1824 1825 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1826 return false; 1827 } 1828 1829 if (type.getScalarSizeInBits() == 16) { 1830 return isInlineableLiteralOp16( 1831 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1832 type, AsmParser->hasInv2PiInlineImm()); 1833 } 1834 1835 return AMDGPU::isInlinableLiteral32( 1836 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1837 AsmParser->hasInv2PiInlineImm()); 1838 } 1839 1840 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1841 // Check that this immediate can be added as literal 1842 if (!isImmTy(ImmTyNone)) { 1843 return false; 1844 } 1845 1846 if (!Imm.IsFPImm) { 1847 // We got int literal token. 1848 1849 if (type == MVT::f64 && hasFPModifiers()) { 1850 // Cannot apply fp modifiers to int literals preserving the same semantics 1851 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1852 // disable these cases. 1853 return false; 1854 } 1855 1856 unsigned Size = type.getSizeInBits(); 1857 if (Size == 64) 1858 Size = 32; 1859 1860 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1861 // types. 1862 return isSafeTruncation(Imm.Val, Size); 1863 } 1864 1865 // We got fp literal token 1866 if (type == MVT::f64) { // Expected 64-bit fp operand 1867 // We would set low 64-bits of literal to zeroes but we accept this literals 1868 return true; 1869 } 1870 1871 if (type == MVT::i64) { // Expected 64-bit int operand 1872 // We don't allow fp literals in 64-bit integer instructions. It is 1873 // unclear how we should encode them. 1874 return false; 1875 } 1876 1877 // We allow fp literals with f16x2 operands assuming that the specified 1878 // literal goes into the lower half and the upper half is zero. We also 1879 // require that the literal may be losslesly converted to f16. 1880 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1881 (type == MVT::v2i16)? MVT::i16 : 1882 (type == MVT::v2f32)? MVT::f32 : type; 1883 1884 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1885 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1886 } 1887 1888 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1889 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1890 } 1891 1892 bool AMDGPUOperand::isVRegWithInputMods() const { 1893 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1894 // GFX90A allows DPP on 64-bit operands. 1895 (isRegClass(AMDGPU::VReg_64RegClassID) && 1896 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1897 } 1898 1899 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1900 if (AsmParser->isVI()) 1901 return isVReg32(); 1902 else if (AsmParser->isGFX9Plus()) 1903 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1904 else 1905 return false; 1906 } 1907 1908 bool AMDGPUOperand::isSDWAFP16Operand() const { 1909 return isSDWAOperand(MVT::f16); 1910 } 1911 1912 bool AMDGPUOperand::isSDWAFP32Operand() const { 1913 return isSDWAOperand(MVT::f32); 1914 } 1915 1916 bool AMDGPUOperand::isSDWAInt16Operand() const { 1917 return isSDWAOperand(MVT::i16); 1918 } 1919 1920 bool AMDGPUOperand::isSDWAInt32Operand() const { 1921 return isSDWAOperand(MVT::i32); 1922 } 1923 1924 bool AMDGPUOperand::isBoolReg() const { 1925 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1926 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1927 } 1928 1929 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1930 { 1931 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1932 assert(Size == 2 || Size == 4 || Size == 8); 1933 1934 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1935 1936 if (Imm.Mods.Abs) { 1937 Val &= ~FpSignMask; 1938 } 1939 if (Imm.Mods.Neg) { 1940 Val ^= FpSignMask; 1941 } 1942 1943 return Val; 1944 } 1945 1946 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1947 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1948 Inst.getNumOperands())) { 1949 addLiteralImmOperand(Inst, Imm.Val, 1950 ApplyModifiers & 1951 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1952 } else { 1953 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1954 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1955 setImmKindNone(); 1956 } 1957 } 1958 1959 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1960 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1961 auto OpNum = Inst.getNumOperands(); 1962 // Check that this operand accepts literals 1963 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1964 1965 if (ApplyModifiers) { 1966 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1967 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1968 Val = applyInputFPModifiers(Val, Size); 1969 } 1970 1971 APInt Literal(64, Val); 1972 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1973 1974 if (Imm.IsFPImm) { // We got fp literal token 1975 switch (OpTy) { 1976 case AMDGPU::OPERAND_REG_IMM_INT64: 1977 case AMDGPU::OPERAND_REG_IMM_FP64: 1978 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1979 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1980 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1981 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1982 AsmParser->hasInv2PiInlineImm())) { 1983 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1984 setImmKindConst(); 1985 return; 1986 } 1987 1988 // Non-inlineable 1989 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1990 // For fp operands we check if low 32 bits are zeros 1991 if (Literal.getLoBits(32) != 0) { 1992 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1993 "Can't encode literal as exact 64-bit floating-point operand. " 1994 "Low 32-bits will be set to zero"); 1995 } 1996 1997 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1998 setImmKindLiteral(); 1999 return; 2000 } 2001 2002 // We don't allow fp literals in 64-bit integer instructions. It is 2003 // unclear how we should encode them. This case should be checked earlier 2004 // in predicate methods (isLiteralImm()) 2005 llvm_unreachable("fp literal in 64-bit integer instruction."); 2006 2007 case AMDGPU::OPERAND_REG_IMM_INT32: 2008 case AMDGPU::OPERAND_REG_IMM_FP32: 2009 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2010 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2011 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2012 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2013 case AMDGPU::OPERAND_REG_IMM_INT16: 2014 case AMDGPU::OPERAND_REG_IMM_FP16: 2015 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2016 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2017 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2018 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2019 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2020 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2021 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2022 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2023 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2024 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2025 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2026 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2027 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2028 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2029 bool lost; 2030 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2031 // Convert literal to single precision 2032 FPLiteral.convert(*getOpFltSemantics(OpTy), 2033 APFloat::rmNearestTiesToEven, &lost); 2034 // We allow precision lost but not overflow or underflow. This should be 2035 // checked earlier in isLiteralImm() 2036 2037 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2038 Inst.addOperand(MCOperand::createImm(ImmVal)); 2039 setImmKindLiteral(); 2040 return; 2041 } 2042 default: 2043 llvm_unreachable("invalid operand size"); 2044 } 2045 2046 return; 2047 } 2048 2049 // We got int literal token. 2050 // Only sign extend inline immediates. 2051 switch (OpTy) { 2052 case AMDGPU::OPERAND_REG_IMM_INT32: 2053 case AMDGPU::OPERAND_REG_IMM_FP32: 2054 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2055 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2056 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2057 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2058 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2059 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2060 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2061 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2062 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2063 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2064 if (isSafeTruncation(Val, 32) && 2065 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2066 AsmParser->hasInv2PiInlineImm())) { 2067 Inst.addOperand(MCOperand::createImm(Val)); 2068 setImmKindConst(); 2069 return; 2070 } 2071 2072 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2073 setImmKindLiteral(); 2074 return; 2075 2076 case AMDGPU::OPERAND_REG_IMM_INT64: 2077 case AMDGPU::OPERAND_REG_IMM_FP64: 2078 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2079 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2080 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2081 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2082 Inst.addOperand(MCOperand::createImm(Val)); 2083 setImmKindConst(); 2084 return; 2085 } 2086 2087 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2088 setImmKindLiteral(); 2089 return; 2090 2091 case AMDGPU::OPERAND_REG_IMM_INT16: 2092 case AMDGPU::OPERAND_REG_IMM_FP16: 2093 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2094 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2095 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2096 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2097 if (isSafeTruncation(Val, 16) && 2098 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2099 AsmParser->hasInv2PiInlineImm())) { 2100 Inst.addOperand(MCOperand::createImm(Val)); 2101 setImmKindConst(); 2102 return; 2103 } 2104 2105 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2106 setImmKindLiteral(); 2107 return; 2108 2109 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2110 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2111 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2112 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2113 assert(isSafeTruncation(Val, 16)); 2114 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2115 AsmParser->hasInv2PiInlineImm())); 2116 2117 Inst.addOperand(MCOperand::createImm(Val)); 2118 return; 2119 } 2120 default: 2121 llvm_unreachable("invalid operand size"); 2122 } 2123 } 2124 2125 template <unsigned Bitwidth> 2126 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2127 APInt Literal(64, Imm.Val); 2128 setImmKindNone(); 2129 2130 if (!Imm.IsFPImm) { 2131 // We got int literal token. 2132 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2133 return; 2134 } 2135 2136 bool Lost; 2137 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2138 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2139 APFloat::rmNearestTiesToEven, &Lost); 2140 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2141 } 2142 2143 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2144 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2145 } 2146 2147 static bool isInlineValue(unsigned Reg) { 2148 switch (Reg) { 2149 case AMDGPU::SRC_SHARED_BASE: 2150 case AMDGPU::SRC_SHARED_LIMIT: 2151 case AMDGPU::SRC_PRIVATE_BASE: 2152 case AMDGPU::SRC_PRIVATE_LIMIT: 2153 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2154 return true; 2155 case AMDGPU::SRC_VCCZ: 2156 case AMDGPU::SRC_EXECZ: 2157 case AMDGPU::SRC_SCC: 2158 return true; 2159 case AMDGPU::SGPR_NULL: 2160 return true; 2161 default: 2162 return false; 2163 } 2164 } 2165 2166 bool AMDGPUOperand::isInlineValue() const { 2167 return isRegKind() && ::isInlineValue(getReg()); 2168 } 2169 2170 //===----------------------------------------------------------------------===// 2171 // AsmParser 2172 //===----------------------------------------------------------------------===// 2173 2174 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2175 if (Is == IS_VGPR) { 2176 switch (RegWidth) { 2177 default: return -1; 2178 case 1: return AMDGPU::VGPR_32RegClassID; 2179 case 2: return AMDGPU::VReg_64RegClassID; 2180 case 3: return AMDGPU::VReg_96RegClassID; 2181 case 4: return AMDGPU::VReg_128RegClassID; 2182 case 5: return AMDGPU::VReg_160RegClassID; 2183 case 6: return AMDGPU::VReg_192RegClassID; 2184 case 8: return AMDGPU::VReg_256RegClassID; 2185 case 16: return AMDGPU::VReg_512RegClassID; 2186 case 32: return AMDGPU::VReg_1024RegClassID; 2187 } 2188 } else if (Is == IS_TTMP) { 2189 switch (RegWidth) { 2190 default: return -1; 2191 case 1: return AMDGPU::TTMP_32RegClassID; 2192 case 2: return AMDGPU::TTMP_64RegClassID; 2193 case 4: return AMDGPU::TTMP_128RegClassID; 2194 case 8: return AMDGPU::TTMP_256RegClassID; 2195 case 16: return AMDGPU::TTMP_512RegClassID; 2196 } 2197 } else if (Is == IS_SGPR) { 2198 switch (RegWidth) { 2199 default: return -1; 2200 case 1: return AMDGPU::SGPR_32RegClassID; 2201 case 2: return AMDGPU::SGPR_64RegClassID; 2202 case 3: return AMDGPU::SGPR_96RegClassID; 2203 case 4: return AMDGPU::SGPR_128RegClassID; 2204 case 5: return AMDGPU::SGPR_160RegClassID; 2205 case 6: return AMDGPU::SGPR_192RegClassID; 2206 case 8: return AMDGPU::SGPR_256RegClassID; 2207 case 16: return AMDGPU::SGPR_512RegClassID; 2208 } 2209 } else if (Is == IS_AGPR) { 2210 switch (RegWidth) { 2211 default: return -1; 2212 case 1: return AMDGPU::AGPR_32RegClassID; 2213 case 2: return AMDGPU::AReg_64RegClassID; 2214 case 3: return AMDGPU::AReg_96RegClassID; 2215 case 4: return AMDGPU::AReg_128RegClassID; 2216 case 5: return AMDGPU::AReg_160RegClassID; 2217 case 6: return AMDGPU::AReg_192RegClassID; 2218 case 8: return AMDGPU::AReg_256RegClassID; 2219 case 16: return AMDGPU::AReg_512RegClassID; 2220 case 32: return AMDGPU::AReg_1024RegClassID; 2221 } 2222 } 2223 return -1; 2224 } 2225 2226 static unsigned getSpecialRegForName(StringRef RegName) { 2227 return StringSwitch<unsigned>(RegName) 2228 .Case("exec", AMDGPU::EXEC) 2229 .Case("vcc", AMDGPU::VCC) 2230 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2231 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2232 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2233 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2234 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2235 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2236 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2237 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2238 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2239 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2240 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2241 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2242 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2243 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2244 .Case("m0", AMDGPU::M0) 2245 .Case("vccz", AMDGPU::SRC_VCCZ) 2246 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2247 .Case("execz", AMDGPU::SRC_EXECZ) 2248 .Case("src_execz", AMDGPU::SRC_EXECZ) 2249 .Case("scc", AMDGPU::SRC_SCC) 2250 .Case("src_scc", AMDGPU::SRC_SCC) 2251 .Case("tba", AMDGPU::TBA) 2252 .Case("tma", AMDGPU::TMA) 2253 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2254 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2255 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2256 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2257 .Case("vcc_lo", AMDGPU::VCC_LO) 2258 .Case("vcc_hi", AMDGPU::VCC_HI) 2259 .Case("exec_lo", AMDGPU::EXEC_LO) 2260 .Case("exec_hi", AMDGPU::EXEC_HI) 2261 .Case("tma_lo", AMDGPU::TMA_LO) 2262 .Case("tma_hi", AMDGPU::TMA_HI) 2263 .Case("tba_lo", AMDGPU::TBA_LO) 2264 .Case("tba_hi", AMDGPU::TBA_HI) 2265 .Case("pc", AMDGPU::PC_REG) 2266 .Case("null", AMDGPU::SGPR_NULL) 2267 .Default(AMDGPU::NoRegister); 2268 } 2269 2270 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2271 SMLoc &EndLoc, bool RestoreOnFailure) { 2272 auto R = parseRegister(); 2273 if (!R) return true; 2274 assert(R->isReg()); 2275 RegNo = R->getReg(); 2276 StartLoc = R->getStartLoc(); 2277 EndLoc = R->getEndLoc(); 2278 return false; 2279 } 2280 2281 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2282 SMLoc &EndLoc) { 2283 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2284 } 2285 2286 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2287 SMLoc &StartLoc, 2288 SMLoc &EndLoc) { 2289 bool Result = 2290 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2291 bool PendingErrors = getParser().hasPendingError(); 2292 getParser().clearPendingErrors(); 2293 if (PendingErrors) 2294 return MatchOperand_ParseFail; 2295 if (Result) 2296 return MatchOperand_NoMatch; 2297 return MatchOperand_Success; 2298 } 2299 2300 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2301 RegisterKind RegKind, unsigned Reg1, 2302 SMLoc Loc) { 2303 switch (RegKind) { 2304 case IS_SPECIAL: 2305 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2306 Reg = AMDGPU::EXEC; 2307 RegWidth = 2; 2308 return true; 2309 } 2310 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2311 Reg = AMDGPU::FLAT_SCR; 2312 RegWidth = 2; 2313 return true; 2314 } 2315 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2316 Reg = AMDGPU::XNACK_MASK; 2317 RegWidth = 2; 2318 return true; 2319 } 2320 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2321 Reg = AMDGPU::VCC; 2322 RegWidth = 2; 2323 return true; 2324 } 2325 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2326 Reg = AMDGPU::TBA; 2327 RegWidth = 2; 2328 return true; 2329 } 2330 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2331 Reg = AMDGPU::TMA; 2332 RegWidth = 2; 2333 return true; 2334 } 2335 Error(Loc, "register does not fit in the list"); 2336 return false; 2337 case IS_VGPR: 2338 case IS_SGPR: 2339 case IS_AGPR: 2340 case IS_TTMP: 2341 if (Reg1 != Reg + RegWidth) { 2342 Error(Loc, "registers in a list must have consecutive indices"); 2343 return false; 2344 } 2345 RegWidth++; 2346 return true; 2347 default: 2348 llvm_unreachable("unexpected register kind"); 2349 } 2350 } 2351 2352 struct RegInfo { 2353 StringLiteral Name; 2354 RegisterKind Kind; 2355 }; 2356 2357 static constexpr RegInfo RegularRegisters[] = { 2358 {{"v"}, IS_VGPR}, 2359 {{"s"}, IS_SGPR}, 2360 {{"ttmp"}, IS_TTMP}, 2361 {{"acc"}, IS_AGPR}, 2362 {{"a"}, IS_AGPR}, 2363 }; 2364 2365 static bool isRegularReg(RegisterKind Kind) { 2366 return Kind == IS_VGPR || 2367 Kind == IS_SGPR || 2368 Kind == IS_TTMP || 2369 Kind == IS_AGPR; 2370 } 2371 2372 static const RegInfo* getRegularRegInfo(StringRef Str) { 2373 for (const RegInfo &Reg : RegularRegisters) 2374 if (Str.startswith(Reg.Name)) 2375 return &Reg; 2376 return nullptr; 2377 } 2378 2379 static bool getRegNum(StringRef Str, unsigned& Num) { 2380 return !Str.getAsInteger(10, Num); 2381 } 2382 2383 bool 2384 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2385 const AsmToken &NextToken) const { 2386 2387 // A list of consecutive registers: [s0,s1,s2,s3] 2388 if (Token.is(AsmToken::LBrac)) 2389 return true; 2390 2391 if (!Token.is(AsmToken::Identifier)) 2392 return false; 2393 2394 // A single register like s0 or a range of registers like s[0:1] 2395 2396 StringRef Str = Token.getString(); 2397 const RegInfo *Reg = getRegularRegInfo(Str); 2398 if (Reg) { 2399 StringRef RegName = Reg->Name; 2400 StringRef RegSuffix = Str.substr(RegName.size()); 2401 if (!RegSuffix.empty()) { 2402 unsigned Num; 2403 // A single register with an index: rXX 2404 if (getRegNum(RegSuffix, Num)) 2405 return true; 2406 } else { 2407 // A range of registers: r[XX:YY]. 2408 if (NextToken.is(AsmToken::LBrac)) 2409 return true; 2410 } 2411 } 2412 2413 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2414 } 2415 2416 bool 2417 AMDGPUAsmParser::isRegister() 2418 { 2419 return isRegister(getToken(), peekToken()); 2420 } 2421 2422 unsigned 2423 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2424 unsigned RegNum, 2425 unsigned RegWidth, 2426 SMLoc Loc) { 2427 2428 assert(isRegularReg(RegKind)); 2429 2430 unsigned AlignSize = 1; 2431 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2432 // SGPR and TTMP registers must be aligned. 2433 // Max required alignment is 4 dwords. 2434 AlignSize = std::min(RegWidth, 4u); 2435 } 2436 2437 if (RegNum % AlignSize != 0) { 2438 Error(Loc, "invalid register alignment"); 2439 return AMDGPU::NoRegister; 2440 } 2441 2442 unsigned RegIdx = RegNum / AlignSize; 2443 int RCID = getRegClass(RegKind, RegWidth); 2444 if (RCID == -1) { 2445 Error(Loc, "invalid or unsupported register size"); 2446 return AMDGPU::NoRegister; 2447 } 2448 2449 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2450 const MCRegisterClass RC = TRI->getRegClass(RCID); 2451 if (RegIdx >= RC.getNumRegs()) { 2452 Error(Loc, "register index is out of range"); 2453 return AMDGPU::NoRegister; 2454 } 2455 2456 return RC.getRegister(RegIdx); 2457 } 2458 2459 bool 2460 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2461 int64_t RegLo, RegHi; 2462 if (!skipToken(AsmToken::LBrac, "missing register index")) 2463 return false; 2464 2465 SMLoc FirstIdxLoc = getLoc(); 2466 SMLoc SecondIdxLoc; 2467 2468 if (!parseExpr(RegLo)) 2469 return false; 2470 2471 if (trySkipToken(AsmToken::Colon)) { 2472 SecondIdxLoc = getLoc(); 2473 if (!parseExpr(RegHi)) 2474 return false; 2475 } else { 2476 RegHi = RegLo; 2477 } 2478 2479 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2480 return false; 2481 2482 if (!isUInt<32>(RegLo)) { 2483 Error(FirstIdxLoc, "invalid register index"); 2484 return false; 2485 } 2486 2487 if (!isUInt<32>(RegHi)) { 2488 Error(SecondIdxLoc, "invalid register index"); 2489 return false; 2490 } 2491 2492 if (RegLo > RegHi) { 2493 Error(FirstIdxLoc, "first register index should not exceed second index"); 2494 return false; 2495 } 2496 2497 Num = static_cast<unsigned>(RegLo); 2498 Width = (RegHi - RegLo) + 1; 2499 return true; 2500 } 2501 2502 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2503 unsigned &RegNum, unsigned &RegWidth, 2504 SmallVectorImpl<AsmToken> &Tokens) { 2505 assert(isToken(AsmToken::Identifier)); 2506 unsigned Reg = getSpecialRegForName(getTokenStr()); 2507 if (Reg) { 2508 RegNum = 0; 2509 RegWidth = 1; 2510 RegKind = IS_SPECIAL; 2511 Tokens.push_back(getToken()); 2512 lex(); // skip register name 2513 } 2514 return Reg; 2515 } 2516 2517 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2518 unsigned &RegNum, unsigned &RegWidth, 2519 SmallVectorImpl<AsmToken> &Tokens) { 2520 assert(isToken(AsmToken::Identifier)); 2521 StringRef RegName = getTokenStr(); 2522 auto Loc = getLoc(); 2523 2524 const RegInfo *RI = getRegularRegInfo(RegName); 2525 if (!RI) { 2526 Error(Loc, "invalid register name"); 2527 return AMDGPU::NoRegister; 2528 } 2529 2530 Tokens.push_back(getToken()); 2531 lex(); // skip register name 2532 2533 RegKind = RI->Kind; 2534 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2535 if (!RegSuffix.empty()) { 2536 // Single 32-bit register: vXX. 2537 if (!getRegNum(RegSuffix, RegNum)) { 2538 Error(Loc, "invalid register index"); 2539 return AMDGPU::NoRegister; 2540 } 2541 RegWidth = 1; 2542 } else { 2543 // Range of registers: v[XX:YY]. ":YY" is optional. 2544 if (!ParseRegRange(RegNum, RegWidth)) 2545 return AMDGPU::NoRegister; 2546 } 2547 2548 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2549 } 2550 2551 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2552 unsigned &RegWidth, 2553 SmallVectorImpl<AsmToken> &Tokens) { 2554 unsigned Reg = AMDGPU::NoRegister; 2555 auto ListLoc = getLoc(); 2556 2557 if (!skipToken(AsmToken::LBrac, 2558 "expected a register or a list of registers")) { 2559 return AMDGPU::NoRegister; 2560 } 2561 2562 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2563 2564 auto Loc = getLoc(); 2565 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2566 return AMDGPU::NoRegister; 2567 if (RegWidth != 1) { 2568 Error(Loc, "expected a single 32-bit register"); 2569 return AMDGPU::NoRegister; 2570 } 2571 2572 for (; trySkipToken(AsmToken::Comma); ) { 2573 RegisterKind NextRegKind; 2574 unsigned NextReg, NextRegNum, NextRegWidth; 2575 Loc = getLoc(); 2576 2577 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2578 NextRegNum, NextRegWidth, 2579 Tokens)) { 2580 return AMDGPU::NoRegister; 2581 } 2582 if (NextRegWidth != 1) { 2583 Error(Loc, "expected a single 32-bit register"); 2584 return AMDGPU::NoRegister; 2585 } 2586 if (NextRegKind != RegKind) { 2587 Error(Loc, "registers in a list must be of the same kind"); 2588 return AMDGPU::NoRegister; 2589 } 2590 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2591 return AMDGPU::NoRegister; 2592 } 2593 2594 if (!skipToken(AsmToken::RBrac, 2595 "expected a comma or a closing square bracket")) { 2596 return AMDGPU::NoRegister; 2597 } 2598 2599 if (isRegularReg(RegKind)) 2600 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2601 2602 return Reg; 2603 } 2604 2605 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2606 unsigned &RegNum, unsigned &RegWidth, 2607 SmallVectorImpl<AsmToken> &Tokens) { 2608 auto Loc = getLoc(); 2609 Reg = AMDGPU::NoRegister; 2610 2611 if (isToken(AsmToken::Identifier)) { 2612 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2613 if (Reg == AMDGPU::NoRegister) 2614 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2615 } else { 2616 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2617 } 2618 2619 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2620 if (Reg == AMDGPU::NoRegister) { 2621 assert(Parser.hasPendingError()); 2622 return false; 2623 } 2624 2625 if (!subtargetHasRegister(*TRI, Reg)) { 2626 if (Reg == AMDGPU::SGPR_NULL) { 2627 Error(Loc, "'null' operand is not supported on this GPU"); 2628 } else { 2629 Error(Loc, "register not available on this GPU"); 2630 } 2631 return false; 2632 } 2633 2634 return true; 2635 } 2636 2637 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2638 unsigned &RegNum, unsigned &RegWidth, 2639 bool RestoreOnFailure /*=false*/) { 2640 Reg = AMDGPU::NoRegister; 2641 2642 SmallVector<AsmToken, 1> Tokens; 2643 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2644 if (RestoreOnFailure) { 2645 while (!Tokens.empty()) { 2646 getLexer().UnLex(Tokens.pop_back_val()); 2647 } 2648 } 2649 return true; 2650 } 2651 return false; 2652 } 2653 2654 Optional<StringRef> 2655 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2656 switch (RegKind) { 2657 case IS_VGPR: 2658 return StringRef(".amdgcn.next_free_vgpr"); 2659 case IS_SGPR: 2660 return StringRef(".amdgcn.next_free_sgpr"); 2661 default: 2662 return None; 2663 } 2664 } 2665 2666 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2667 auto SymbolName = getGprCountSymbolName(RegKind); 2668 assert(SymbolName && "initializing invalid register kind"); 2669 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2670 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2671 } 2672 2673 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2674 unsigned DwordRegIndex, 2675 unsigned RegWidth) { 2676 // Symbols are only defined for GCN targets 2677 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2678 return true; 2679 2680 auto SymbolName = getGprCountSymbolName(RegKind); 2681 if (!SymbolName) 2682 return true; 2683 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2684 2685 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2686 int64_t OldCount; 2687 2688 if (!Sym->isVariable()) 2689 return !Error(getLoc(), 2690 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2691 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2692 return !Error( 2693 getLoc(), 2694 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2695 2696 if (OldCount <= NewMax) 2697 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2698 2699 return true; 2700 } 2701 2702 std::unique_ptr<AMDGPUOperand> 2703 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2704 const auto &Tok = getToken(); 2705 SMLoc StartLoc = Tok.getLoc(); 2706 SMLoc EndLoc = Tok.getEndLoc(); 2707 RegisterKind RegKind; 2708 unsigned Reg, RegNum, RegWidth; 2709 2710 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2711 return nullptr; 2712 } 2713 if (isHsaAbiVersion3Or4(&getSTI())) { 2714 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2715 return nullptr; 2716 } else 2717 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2718 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2719 } 2720 2721 OperandMatchResultTy 2722 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2723 // TODO: add syntactic sugar for 1/(2*PI) 2724 2725 assert(!isRegister()); 2726 assert(!isModifier()); 2727 2728 const auto& Tok = getToken(); 2729 const auto& NextTok = peekToken(); 2730 bool IsReal = Tok.is(AsmToken::Real); 2731 SMLoc S = getLoc(); 2732 bool Negate = false; 2733 2734 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2735 lex(); 2736 IsReal = true; 2737 Negate = true; 2738 } 2739 2740 if (IsReal) { 2741 // Floating-point expressions are not supported. 2742 // Can only allow floating-point literals with an 2743 // optional sign. 2744 2745 StringRef Num = getTokenStr(); 2746 lex(); 2747 2748 APFloat RealVal(APFloat::IEEEdouble()); 2749 auto roundMode = APFloat::rmNearestTiesToEven; 2750 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2751 return MatchOperand_ParseFail; 2752 } 2753 if (Negate) 2754 RealVal.changeSign(); 2755 2756 Operands.push_back( 2757 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2758 AMDGPUOperand::ImmTyNone, true)); 2759 2760 return MatchOperand_Success; 2761 2762 } else { 2763 int64_t IntVal; 2764 const MCExpr *Expr; 2765 SMLoc S = getLoc(); 2766 2767 if (HasSP3AbsModifier) { 2768 // This is a workaround for handling expressions 2769 // as arguments of SP3 'abs' modifier, for example: 2770 // |1.0| 2771 // |-1| 2772 // |1+x| 2773 // This syntax is not compatible with syntax of standard 2774 // MC expressions (due to the trailing '|'). 2775 SMLoc EndLoc; 2776 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2777 return MatchOperand_ParseFail; 2778 } else { 2779 if (Parser.parseExpression(Expr)) 2780 return MatchOperand_ParseFail; 2781 } 2782 2783 if (Expr->evaluateAsAbsolute(IntVal)) { 2784 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2785 } else { 2786 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2787 } 2788 2789 return MatchOperand_Success; 2790 } 2791 2792 return MatchOperand_NoMatch; 2793 } 2794 2795 OperandMatchResultTy 2796 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2797 if (!isRegister()) 2798 return MatchOperand_NoMatch; 2799 2800 if (auto R = parseRegister()) { 2801 assert(R->isReg()); 2802 Operands.push_back(std::move(R)); 2803 return MatchOperand_Success; 2804 } 2805 return MatchOperand_ParseFail; 2806 } 2807 2808 OperandMatchResultTy 2809 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2810 auto res = parseReg(Operands); 2811 if (res != MatchOperand_NoMatch) { 2812 return res; 2813 } else if (isModifier()) { 2814 return MatchOperand_NoMatch; 2815 } else { 2816 return parseImm(Operands, HasSP3AbsMod); 2817 } 2818 } 2819 2820 bool 2821 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2822 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2823 const auto &str = Token.getString(); 2824 return str == "abs" || str == "neg" || str == "sext"; 2825 } 2826 return false; 2827 } 2828 2829 bool 2830 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2831 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2832 } 2833 2834 bool 2835 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2836 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2837 } 2838 2839 bool 2840 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2841 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2842 } 2843 2844 // Check if this is an operand modifier or an opcode modifier 2845 // which may look like an expression but it is not. We should 2846 // avoid parsing these modifiers as expressions. Currently 2847 // recognized sequences are: 2848 // |...| 2849 // abs(...) 2850 // neg(...) 2851 // sext(...) 2852 // -reg 2853 // -|...| 2854 // -abs(...) 2855 // name:... 2856 // Note that simple opcode modifiers like 'gds' may be parsed as 2857 // expressions; this is a special case. See getExpressionAsToken. 2858 // 2859 bool 2860 AMDGPUAsmParser::isModifier() { 2861 2862 AsmToken Tok = getToken(); 2863 AsmToken NextToken[2]; 2864 peekTokens(NextToken); 2865 2866 return isOperandModifier(Tok, NextToken[0]) || 2867 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2868 isOpcodeModifierWithVal(Tok, NextToken[0]); 2869 } 2870 2871 // Check if the current token is an SP3 'neg' modifier. 2872 // Currently this modifier is allowed in the following context: 2873 // 2874 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2875 // 2. Before an 'abs' modifier: -abs(...) 2876 // 3. Before an SP3 'abs' modifier: -|...| 2877 // 2878 // In all other cases "-" is handled as a part 2879 // of an expression that follows the sign. 2880 // 2881 // Note: When "-" is followed by an integer literal, 2882 // this is interpreted as integer negation rather 2883 // than a floating-point NEG modifier applied to N. 2884 // Beside being contr-intuitive, such use of floating-point 2885 // NEG modifier would have resulted in different meaning 2886 // of integer literals used with VOP1/2/C and VOP3, 2887 // for example: 2888 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2889 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2890 // Negative fp literals with preceding "-" are 2891 // handled likewise for unifomtity 2892 // 2893 bool 2894 AMDGPUAsmParser::parseSP3NegModifier() { 2895 2896 AsmToken NextToken[2]; 2897 peekTokens(NextToken); 2898 2899 if (isToken(AsmToken::Minus) && 2900 (isRegister(NextToken[0], NextToken[1]) || 2901 NextToken[0].is(AsmToken::Pipe) || 2902 isId(NextToken[0], "abs"))) { 2903 lex(); 2904 return true; 2905 } 2906 2907 return false; 2908 } 2909 2910 OperandMatchResultTy 2911 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2912 bool AllowImm) { 2913 bool Neg, SP3Neg; 2914 bool Abs, SP3Abs; 2915 SMLoc Loc; 2916 2917 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2918 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2919 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2920 return MatchOperand_ParseFail; 2921 } 2922 2923 SP3Neg = parseSP3NegModifier(); 2924 2925 Loc = getLoc(); 2926 Neg = trySkipId("neg"); 2927 if (Neg && SP3Neg) { 2928 Error(Loc, "expected register or immediate"); 2929 return MatchOperand_ParseFail; 2930 } 2931 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2932 return MatchOperand_ParseFail; 2933 2934 Abs = trySkipId("abs"); 2935 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2936 return MatchOperand_ParseFail; 2937 2938 Loc = getLoc(); 2939 SP3Abs = trySkipToken(AsmToken::Pipe); 2940 if (Abs && SP3Abs) { 2941 Error(Loc, "expected register or immediate"); 2942 return MatchOperand_ParseFail; 2943 } 2944 2945 OperandMatchResultTy Res; 2946 if (AllowImm) { 2947 Res = parseRegOrImm(Operands, SP3Abs); 2948 } else { 2949 Res = parseReg(Operands); 2950 } 2951 if (Res != MatchOperand_Success) { 2952 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2953 } 2954 2955 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2956 return MatchOperand_ParseFail; 2957 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2958 return MatchOperand_ParseFail; 2959 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2960 return MatchOperand_ParseFail; 2961 2962 AMDGPUOperand::Modifiers Mods; 2963 Mods.Abs = Abs || SP3Abs; 2964 Mods.Neg = Neg || SP3Neg; 2965 2966 if (Mods.hasFPModifiers()) { 2967 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2968 if (Op.isExpr()) { 2969 Error(Op.getStartLoc(), "expected an absolute expression"); 2970 return MatchOperand_ParseFail; 2971 } 2972 Op.setModifiers(Mods); 2973 } 2974 return MatchOperand_Success; 2975 } 2976 2977 OperandMatchResultTy 2978 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2979 bool AllowImm) { 2980 bool Sext = trySkipId("sext"); 2981 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2982 return MatchOperand_ParseFail; 2983 2984 OperandMatchResultTy Res; 2985 if (AllowImm) { 2986 Res = parseRegOrImm(Operands); 2987 } else { 2988 Res = parseReg(Operands); 2989 } 2990 if (Res != MatchOperand_Success) { 2991 return Sext? MatchOperand_ParseFail : Res; 2992 } 2993 2994 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2995 return MatchOperand_ParseFail; 2996 2997 AMDGPUOperand::Modifiers Mods; 2998 Mods.Sext = Sext; 2999 3000 if (Mods.hasIntModifiers()) { 3001 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3002 if (Op.isExpr()) { 3003 Error(Op.getStartLoc(), "expected an absolute expression"); 3004 return MatchOperand_ParseFail; 3005 } 3006 Op.setModifiers(Mods); 3007 } 3008 3009 return MatchOperand_Success; 3010 } 3011 3012 OperandMatchResultTy 3013 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3014 return parseRegOrImmWithFPInputMods(Operands, false); 3015 } 3016 3017 OperandMatchResultTy 3018 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3019 return parseRegOrImmWithIntInputMods(Operands, false); 3020 } 3021 3022 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3023 auto Loc = getLoc(); 3024 if (trySkipId("off")) { 3025 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3026 AMDGPUOperand::ImmTyOff, false)); 3027 return MatchOperand_Success; 3028 } 3029 3030 if (!isRegister()) 3031 return MatchOperand_NoMatch; 3032 3033 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3034 if (Reg) { 3035 Operands.push_back(std::move(Reg)); 3036 return MatchOperand_Success; 3037 } 3038 3039 return MatchOperand_ParseFail; 3040 3041 } 3042 3043 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3044 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3045 3046 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3047 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3048 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3049 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3050 return Match_InvalidOperand; 3051 3052 if ((TSFlags & SIInstrFlags::VOP3) && 3053 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3054 getForcedEncodingSize() != 64) 3055 return Match_PreferE32; 3056 3057 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3058 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3059 // v_mac_f32/16 allow only dst_sel == DWORD; 3060 auto OpNum = 3061 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3062 const auto &Op = Inst.getOperand(OpNum); 3063 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3064 return Match_InvalidOperand; 3065 } 3066 } 3067 3068 return Match_Success; 3069 } 3070 3071 static ArrayRef<unsigned> getAllVariants() { 3072 static const unsigned Variants[] = { 3073 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3074 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3075 }; 3076 3077 return makeArrayRef(Variants); 3078 } 3079 3080 // What asm variants we should check 3081 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3082 if (getForcedEncodingSize() == 32) { 3083 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3084 return makeArrayRef(Variants); 3085 } 3086 3087 if (isForcedVOP3()) { 3088 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3089 return makeArrayRef(Variants); 3090 } 3091 3092 if (isForcedSDWA()) { 3093 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3094 AMDGPUAsmVariants::SDWA9}; 3095 return makeArrayRef(Variants); 3096 } 3097 3098 if (isForcedDPP()) { 3099 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3100 return makeArrayRef(Variants); 3101 } 3102 3103 return getAllVariants(); 3104 } 3105 3106 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3107 if (getForcedEncodingSize() == 32) 3108 return "e32"; 3109 3110 if (isForcedVOP3()) 3111 return "e64"; 3112 3113 if (isForcedSDWA()) 3114 return "sdwa"; 3115 3116 if (isForcedDPP()) 3117 return "dpp"; 3118 3119 return ""; 3120 } 3121 3122 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3123 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3124 const unsigned Num = Desc.getNumImplicitUses(); 3125 for (unsigned i = 0; i < Num; ++i) { 3126 unsigned Reg = Desc.ImplicitUses[i]; 3127 switch (Reg) { 3128 case AMDGPU::FLAT_SCR: 3129 case AMDGPU::VCC: 3130 case AMDGPU::VCC_LO: 3131 case AMDGPU::VCC_HI: 3132 case AMDGPU::M0: 3133 return Reg; 3134 default: 3135 break; 3136 } 3137 } 3138 return AMDGPU::NoRegister; 3139 } 3140 3141 // NB: This code is correct only when used to check constant 3142 // bus limitations because GFX7 support no f16 inline constants. 3143 // Note that there are no cases when a GFX7 opcode violates 3144 // constant bus limitations due to the use of an f16 constant. 3145 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3146 unsigned OpIdx) const { 3147 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3148 3149 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3150 return false; 3151 } 3152 3153 const MCOperand &MO = Inst.getOperand(OpIdx); 3154 3155 int64_t Val = MO.getImm(); 3156 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3157 3158 switch (OpSize) { // expected operand size 3159 case 8: 3160 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3161 case 4: 3162 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3163 case 2: { 3164 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3165 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3166 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3167 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3168 return AMDGPU::isInlinableIntLiteral(Val); 3169 3170 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3171 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3172 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3173 return AMDGPU::isInlinableIntLiteralV216(Val); 3174 3175 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3176 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3177 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3178 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3179 3180 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3181 } 3182 default: 3183 llvm_unreachable("invalid operand size"); 3184 } 3185 } 3186 3187 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3188 if (!isGFX10Plus()) 3189 return 1; 3190 3191 switch (Opcode) { 3192 // 64-bit shift instructions can use only one scalar value input 3193 case AMDGPU::V_LSHLREV_B64_e64: 3194 case AMDGPU::V_LSHLREV_B64_gfx10: 3195 case AMDGPU::V_LSHRREV_B64_e64: 3196 case AMDGPU::V_LSHRREV_B64_gfx10: 3197 case AMDGPU::V_ASHRREV_I64_e64: 3198 case AMDGPU::V_ASHRREV_I64_gfx10: 3199 case AMDGPU::V_LSHL_B64_e64: 3200 case AMDGPU::V_LSHR_B64_e64: 3201 case AMDGPU::V_ASHR_I64_e64: 3202 return 1; 3203 default: 3204 return 2; 3205 } 3206 } 3207 3208 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3209 const MCOperand &MO = Inst.getOperand(OpIdx); 3210 if (MO.isImm()) { 3211 return !isInlineConstant(Inst, OpIdx); 3212 } else if (MO.isReg()) { 3213 auto Reg = MO.getReg(); 3214 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3215 auto PReg = mc2PseudoReg(Reg); 3216 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3217 } else { 3218 return true; 3219 } 3220 } 3221 3222 bool 3223 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3224 const OperandVector &Operands) { 3225 const unsigned Opcode = Inst.getOpcode(); 3226 const MCInstrDesc &Desc = MII.get(Opcode); 3227 unsigned LastSGPR = AMDGPU::NoRegister; 3228 unsigned ConstantBusUseCount = 0; 3229 unsigned NumLiterals = 0; 3230 unsigned LiteralSize; 3231 3232 if (Desc.TSFlags & 3233 (SIInstrFlags::VOPC | 3234 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3235 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3236 SIInstrFlags::SDWA)) { 3237 // Check special imm operands (used by madmk, etc) 3238 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3239 ++ConstantBusUseCount; 3240 } 3241 3242 SmallDenseSet<unsigned> SGPRsUsed; 3243 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3244 if (SGPRUsed != AMDGPU::NoRegister) { 3245 SGPRsUsed.insert(SGPRUsed); 3246 ++ConstantBusUseCount; 3247 } 3248 3249 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3250 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3251 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3252 3253 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3254 3255 for (int OpIdx : OpIndices) { 3256 if (OpIdx == -1) break; 3257 3258 const MCOperand &MO = Inst.getOperand(OpIdx); 3259 if (usesConstantBus(Inst, OpIdx)) { 3260 if (MO.isReg()) { 3261 LastSGPR = mc2PseudoReg(MO.getReg()); 3262 // Pairs of registers with a partial intersections like these 3263 // s0, s[0:1] 3264 // flat_scratch_lo, flat_scratch 3265 // flat_scratch_lo, flat_scratch_hi 3266 // are theoretically valid but they are disabled anyway. 3267 // Note that this code mimics SIInstrInfo::verifyInstruction 3268 if (!SGPRsUsed.count(LastSGPR)) { 3269 SGPRsUsed.insert(LastSGPR); 3270 ++ConstantBusUseCount; 3271 } 3272 } else { // Expression or a literal 3273 3274 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3275 continue; // special operand like VINTERP attr_chan 3276 3277 // An instruction may use only one literal. 3278 // This has been validated on the previous step. 3279 // See validateVOP3Literal. 3280 // This literal may be used as more than one operand. 3281 // If all these operands are of the same size, 3282 // this literal counts as one scalar value. 3283 // Otherwise it counts as 2 scalar values. 3284 // See "GFX10 Shader Programming", section 3.6.2.3. 3285 3286 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3287 if (Size < 4) Size = 4; 3288 3289 if (NumLiterals == 0) { 3290 NumLiterals = 1; 3291 LiteralSize = Size; 3292 } else if (LiteralSize != Size) { 3293 NumLiterals = 2; 3294 } 3295 } 3296 } 3297 } 3298 } 3299 ConstantBusUseCount += NumLiterals; 3300 3301 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3302 return true; 3303 3304 SMLoc LitLoc = getLitLoc(Operands); 3305 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3306 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3307 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3308 return false; 3309 } 3310 3311 bool 3312 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3313 const OperandVector &Operands) { 3314 const unsigned Opcode = Inst.getOpcode(); 3315 const MCInstrDesc &Desc = MII.get(Opcode); 3316 3317 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3318 if (DstIdx == -1 || 3319 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3320 return true; 3321 } 3322 3323 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3324 3325 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3326 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3327 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3328 3329 assert(DstIdx != -1); 3330 const MCOperand &Dst = Inst.getOperand(DstIdx); 3331 assert(Dst.isReg()); 3332 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3333 3334 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3335 3336 for (int SrcIdx : SrcIndices) { 3337 if (SrcIdx == -1) break; 3338 const MCOperand &Src = Inst.getOperand(SrcIdx); 3339 if (Src.isReg()) { 3340 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3341 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3342 Error(getRegLoc(SrcReg, Operands), 3343 "destination must be different than all sources"); 3344 return false; 3345 } 3346 } 3347 } 3348 3349 return true; 3350 } 3351 3352 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3353 3354 const unsigned Opc = Inst.getOpcode(); 3355 const MCInstrDesc &Desc = MII.get(Opc); 3356 3357 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3358 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3359 assert(ClampIdx != -1); 3360 return Inst.getOperand(ClampIdx).getImm() == 0; 3361 } 3362 3363 return true; 3364 } 3365 3366 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3367 3368 const unsigned Opc = Inst.getOpcode(); 3369 const MCInstrDesc &Desc = MII.get(Opc); 3370 3371 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3372 return true; 3373 3374 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3375 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3376 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3377 3378 assert(VDataIdx != -1); 3379 3380 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3381 return true; 3382 3383 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3384 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3385 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3386 if (DMask == 0) 3387 DMask = 1; 3388 3389 unsigned DataSize = 3390 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3391 if (hasPackedD16()) { 3392 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3393 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3394 DataSize = (DataSize + 1) / 2; 3395 } 3396 3397 return (VDataSize / 4) == DataSize + TFESize; 3398 } 3399 3400 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3401 const unsigned Opc = Inst.getOpcode(); 3402 const MCInstrDesc &Desc = MII.get(Opc); 3403 3404 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3405 return true; 3406 3407 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3408 3409 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3410 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3411 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3412 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3413 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3414 3415 assert(VAddr0Idx != -1); 3416 assert(SrsrcIdx != -1); 3417 assert(SrsrcIdx > VAddr0Idx); 3418 3419 if (DimIdx == -1) 3420 return true; // intersect_ray 3421 3422 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3423 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3424 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3425 unsigned VAddrSize = 3426 IsNSA ? SrsrcIdx - VAddr0Idx 3427 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3428 3429 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3430 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3431 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3432 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3433 if (!IsNSA) { 3434 if (AddrSize > 8) 3435 AddrSize = 16; 3436 else if (AddrSize > 4) 3437 AddrSize = 8; 3438 } 3439 3440 return VAddrSize == AddrSize; 3441 } 3442 3443 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3444 3445 const unsigned Opc = Inst.getOpcode(); 3446 const MCInstrDesc &Desc = MII.get(Opc); 3447 3448 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3449 return true; 3450 if (!Desc.mayLoad() || !Desc.mayStore()) 3451 return true; // Not atomic 3452 3453 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3454 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3455 3456 // This is an incomplete check because image_atomic_cmpswap 3457 // may only use 0x3 and 0xf while other atomic operations 3458 // may use 0x1 and 0x3. However these limitations are 3459 // verified when we check that dmask matches dst size. 3460 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3461 } 3462 3463 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3464 3465 const unsigned Opc = Inst.getOpcode(); 3466 const MCInstrDesc &Desc = MII.get(Opc); 3467 3468 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3469 return true; 3470 3471 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3472 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3473 3474 // GATHER4 instructions use dmask in a different fashion compared to 3475 // other MIMG instructions. The only useful DMASK values are 3476 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3477 // (red,red,red,red) etc.) The ISA document doesn't mention 3478 // this. 3479 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3480 } 3481 3482 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3483 const unsigned Opc = Inst.getOpcode(); 3484 const MCInstrDesc &Desc = MII.get(Opc); 3485 3486 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3487 return true; 3488 3489 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3490 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3491 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3492 3493 if (!BaseOpcode->MSAA) 3494 return true; 3495 3496 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3497 assert(DimIdx != -1); 3498 3499 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3500 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3501 3502 return DimInfo->MSAA; 3503 } 3504 3505 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3506 { 3507 switch (Opcode) { 3508 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3509 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3510 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3511 return true; 3512 default: 3513 return false; 3514 } 3515 } 3516 3517 // movrels* opcodes should only allow VGPRS as src0. 3518 // This is specified in .td description for vop1/vop3, 3519 // but sdwa is handled differently. See isSDWAOperand. 3520 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3521 const OperandVector &Operands) { 3522 3523 const unsigned Opc = Inst.getOpcode(); 3524 const MCInstrDesc &Desc = MII.get(Opc); 3525 3526 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3527 return true; 3528 3529 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3530 assert(Src0Idx != -1); 3531 3532 SMLoc ErrLoc; 3533 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3534 if (Src0.isReg()) { 3535 auto Reg = mc2PseudoReg(Src0.getReg()); 3536 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3537 if (!isSGPR(Reg, TRI)) 3538 return true; 3539 ErrLoc = getRegLoc(Reg, Operands); 3540 } else { 3541 ErrLoc = getConstLoc(Operands); 3542 } 3543 3544 Error(ErrLoc, "source operand must be a VGPR"); 3545 return false; 3546 } 3547 3548 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3549 const OperandVector &Operands) { 3550 3551 const unsigned Opc = Inst.getOpcode(); 3552 3553 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3554 return true; 3555 3556 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3557 assert(Src0Idx != -1); 3558 3559 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3560 if (!Src0.isReg()) 3561 return true; 3562 3563 auto Reg = mc2PseudoReg(Src0.getReg()); 3564 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3565 if (isSGPR(Reg, TRI)) { 3566 Error(getRegLoc(Reg, Operands), 3567 "source operand must be either a VGPR or an inline constant"); 3568 return false; 3569 } 3570 3571 return true; 3572 } 3573 3574 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3575 switch (Inst.getOpcode()) { 3576 default: 3577 return true; 3578 case V_DIV_SCALE_F32_gfx6_gfx7: 3579 case V_DIV_SCALE_F32_vi: 3580 case V_DIV_SCALE_F32_gfx10: 3581 case V_DIV_SCALE_F64_gfx6_gfx7: 3582 case V_DIV_SCALE_F64_vi: 3583 case V_DIV_SCALE_F64_gfx10: 3584 break; 3585 } 3586 3587 // TODO: Check that src0 = src1 or src2. 3588 3589 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3590 AMDGPU::OpName::src2_modifiers, 3591 AMDGPU::OpName::src2_modifiers}) { 3592 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3593 .getImm() & 3594 SISrcMods::ABS) { 3595 return false; 3596 } 3597 } 3598 3599 return true; 3600 } 3601 3602 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3603 3604 const unsigned Opc = Inst.getOpcode(); 3605 const MCInstrDesc &Desc = MII.get(Opc); 3606 3607 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3608 return true; 3609 3610 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3611 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3612 if (isCI() || isSI()) 3613 return false; 3614 } 3615 3616 return true; 3617 } 3618 3619 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3620 const unsigned Opc = Inst.getOpcode(); 3621 const MCInstrDesc &Desc = MII.get(Opc); 3622 3623 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3624 return true; 3625 3626 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3627 if (DimIdx < 0) 3628 return true; 3629 3630 long Imm = Inst.getOperand(DimIdx).getImm(); 3631 if (Imm < 0 || Imm >= 8) 3632 return false; 3633 3634 return true; 3635 } 3636 3637 static bool IsRevOpcode(const unsigned Opcode) 3638 { 3639 switch (Opcode) { 3640 case AMDGPU::V_SUBREV_F32_e32: 3641 case AMDGPU::V_SUBREV_F32_e64: 3642 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3643 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3644 case AMDGPU::V_SUBREV_F32_e32_vi: 3645 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3646 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3647 case AMDGPU::V_SUBREV_F32_e64_vi: 3648 3649 case AMDGPU::V_SUBREV_CO_U32_e32: 3650 case AMDGPU::V_SUBREV_CO_U32_e64: 3651 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3652 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3653 3654 case AMDGPU::V_SUBBREV_U32_e32: 3655 case AMDGPU::V_SUBBREV_U32_e64: 3656 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3657 case AMDGPU::V_SUBBREV_U32_e32_vi: 3658 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3659 case AMDGPU::V_SUBBREV_U32_e64_vi: 3660 3661 case AMDGPU::V_SUBREV_U32_e32: 3662 case AMDGPU::V_SUBREV_U32_e64: 3663 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3664 case AMDGPU::V_SUBREV_U32_e32_vi: 3665 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3666 case AMDGPU::V_SUBREV_U32_e64_vi: 3667 3668 case AMDGPU::V_SUBREV_F16_e32: 3669 case AMDGPU::V_SUBREV_F16_e64: 3670 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3671 case AMDGPU::V_SUBREV_F16_e32_vi: 3672 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3673 case AMDGPU::V_SUBREV_F16_e64_vi: 3674 3675 case AMDGPU::V_SUBREV_U16_e32: 3676 case AMDGPU::V_SUBREV_U16_e64: 3677 case AMDGPU::V_SUBREV_U16_e32_vi: 3678 case AMDGPU::V_SUBREV_U16_e64_vi: 3679 3680 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3681 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3682 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3683 3684 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3685 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3686 3687 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3688 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3689 3690 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3691 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3692 3693 case AMDGPU::V_LSHRREV_B32_e32: 3694 case AMDGPU::V_LSHRREV_B32_e64: 3695 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3696 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3697 case AMDGPU::V_LSHRREV_B32_e32_vi: 3698 case AMDGPU::V_LSHRREV_B32_e64_vi: 3699 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3700 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3701 3702 case AMDGPU::V_ASHRREV_I32_e32: 3703 case AMDGPU::V_ASHRREV_I32_e64: 3704 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3705 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3706 case AMDGPU::V_ASHRREV_I32_e32_vi: 3707 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3708 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3709 case AMDGPU::V_ASHRREV_I32_e64_vi: 3710 3711 case AMDGPU::V_LSHLREV_B32_e32: 3712 case AMDGPU::V_LSHLREV_B32_e64: 3713 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3714 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3715 case AMDGPU::V_LSHLREV_B32_e32_vi: 3716 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3717 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3718 case AMDGPU::V_LSHLREV_B32_e64_vi: 3719 3720 case AMDGPU::V_LSHLREV_B16_e32: 3721 case AMDGPU::V_LSHLREV_B16_e64: 3722 case AMDGPU::V_LSHLREV_B16_e32_vi: 3723 case AMDGPU::V_LSHLREV_B16_e64_vi: 3724 case AMDGPU::V_LSHLREV_B16_gfx10: 3725 3726 case AMDGPU::V_LSHRREV_B16_e32: 3727 case AMDGPU::V_LSHRREV_B16_e64: 3728 case AMDGPU::V_LSHRREV_B16_e32_vi: 3729 case AMDGPU::V_LSHRREV_B16_e64_vi: 3730 case AMDGPU::V_LSHRREV_B16_gfx10: 3731 3732 case AMDGPU::V_ASHRREV_I16_e32: 3733 case AMDGPU::V_ASHRREV_I16_e64: 3734 case AMDGPU::V_ASHRREV_I16_e32_vi: 3735 case AMDGPU::V_ASHRREV_I16_e64_vi: 3736 case AMDGPU::V_ASHRREV_I16_gfx10: 3737 3738 case AMDGPU::V_LSHLREV_B64_e64: 3739 case AMDGPU::V_LSHLREV_B64_gfx10: 3740 case AMDGPU::V_LSHLREV_B64_vi: 3741 3742 case AMDGPU::V_LSHRREV_B64_e64: 3743 case AMDGPU::V_LSHRREV_B64_gfx10: 3744 case AMDGPU::V_LSHRREV_B64_vi: 3745 3746 case AMDGPU::V_ASHRREV_I64_e64: 3747 case AMDGPU::V_ASHRREV_I64_gfx10: 3748 case AMDGPU::V_ASHRREV_I64_vi: 3749 3750 case AMDGPU::V_PK_LSHLREV_B16: 3751 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3752 case AMDGPU::V_PK_LSHLREV_B16_vi: 3753 3754 case AMDGPU::V_PK_LSHRREV_B16: 3755 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3756 case AMDGPU::V_PK_LSHRREV_B16_vi: 3757 case AMDGPU::V_PK_ASHRREV_I16: 3758 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3759 case AMDGPU::V_PK_ASHRREV_I16_vi: 3760 return true; 3761 default: 3762 return false; 3763 } 3764 } 3765 3766 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3767 3768 using namespace SIInstrFlags; 3769 const unsigned Opcode = Inst.getOpcode(); 3770 const MCInstrDesc &Desc = MII.get(Opcode); 3771 3772 // lds_direct register is defined so that it can be used 3773 // with 9-bit operands only. Ignore encodings which do not accept these. 3774 const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA; 3775 if ((Desc.TSFlags & Enc) == 0) 3776 return None; 3777 3778 for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) { 3779 auto SrcIdx = getNamedOperandIdx(Opcode, SrcName); 3780 if (SrcIdx == -1) 3781 break; 3782 const auto &Src = Inst.getOperand(SrcIdx); 3783 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3784 3785 if (isGFX90A()) 3786 return StringRef("lds_direct is not supported on this GPU"); 3787 3788 if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA)) 3789 return StringRef("lds_direct cannot be used with this instruction"); 3790 3791 if (SrcName != OpName::src0) 3792 return StringRef("lds_direct may be used as src0 only"); 3793 } 3794 } 3795 3796 return None; 3797 } 3798 3799 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3800 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3801 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3802 if (Op.isFlatOffset()) 3803 return Op.getStartLoc(); 3804 } 3805 return getLoc(); 3806 } 3807 3808 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3809 const OperandVector &Operands) { 3810 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3811 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3812 return true; 3813 3814 auto Opcode = Inst.getOpcode(); 3815 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3816 assert(OpNum != -1); 3817 3818 const auto &Op = Inst.getOperand(OpNum); 3819 if (!hasFlatOffsets() && Op.getImm() != 0) { 3820 Error(getFlatOffsetLoc(Operands), 3821 "flat offset modifier is not supported on this GPU"); 3822 return false; 3823 } 3824 3825 // For FLAT segment the offset must be positive; 3826 // MSB is ignored and forced to zero. 3827 if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) { 3828 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3829 if (!isIntN(OffsetSize, Op.getImm())) { 3830 Error(getFlatOffsetLoc(Operands), 3831 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3832 return false; 3833 } 3834 } else { 3835 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3836 if (!isUIntN(OffsetSize, Op.getImm())) { 3837 Error(getFlatOffsetLoc(Operands), 3838 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3839 return false; 3840 } 3841 } 3842 3843 return true; 3844 } 3845 3846 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3847 // Start with second operand because SMEM Offset cannot be dst or src0. 3848 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3849 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3850 if (Op.isSMEMOffset()) 3851 return Op.getStartLoc(); 3852 } 3853 return getLoc(); 3854 } 3855 3856 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3857 const OperandVector &Operands) { 3858 if (isCI() || isSI()) 3859 return true; 3860 3861 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3862 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3863 return true; 3864 3865 auto Opcode = Inst.getOpcode(); 3866 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3867 if (OpNum == -1) 3868 return true; 3869 3870 const auto &Op = Inst.getOperand(OpNum); 3871 if (!Op.isImm()) 3872 return true; 3873 3874 uint64_t Offset = Op.getImm(); 3875 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3876 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3877 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3878 return true; 3879 3880 Error(getSMEMOffsetLoc(Operands), 3881 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3882 "expected a 21-bit signed offset"); 3883 3884 return false; 3885 } 3886 3887 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3888 unsigned Opcode = Inst.getOpcode(); 3889 const MCInstrDesc &Desc = MII.get(Opcode); 3890 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3891 return true; 3892 3893 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3894 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3895 3896 const int OpIndices[] = { Src0Idx, Src1Idx }; 3897 3898 unsigned NumExprs = 0; 3899 unsigned NumLiterals = 0; 3900 uint32_t LiteralValue; 3901 3902 for (int OpIdx : OpIndices) { 3903 if (OpIdx == -1) break; 3904 3905 const MCOperand &MO = Inst.getOperand(OpIdx); 3906 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3907 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3908 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3909 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3910 if (NumLiterals == 0 || LiteralValue != Value) { 3911 LiteralValue = Value; 3912 ++NumLiterals; 3913 } 3914 } else if (MO.isExpr()) { 3915 ++NumExprs; 3916 } 3917 } 3918 } 3919 3920 return NumLiterals + NumExprs <= 1; 3921 } 3922 3923 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3924 const unsigned Opc = Inst.getOpcode(); 3925 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3926 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3927 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3928 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3929 3930 if (OpSel & ~3) 3931 return false; 3932 } 3933 return true; 3934 } 3935 3936 // Check if VCC register matches wavefront size 3937 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3938 auto FB = getFeatureBits(); 3939 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3940 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3941 } 3942 3943 // VOP3 literal is only allowed in GFX10+ and only one can be used 3944 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3945 const OperandVector &Operands) { 3946 unsigned Opcode = Inst.getOpcode(); 3947 const MCInstrDesc &Desc = MII.get(Opcode); 3948 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3949 return true; 3950 3951 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3952 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3953 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3954 3955 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3956 3957 unsigned NumExprs = 0; 3958 unsigned NumLiterals = 0; 3959 uint32_t LiteralValue; 3960 3961 for (int OpIdx : OpIndices) { 3962 if (OpIdx == -1) break; 3963 3964 const MCOperand &MO = Inst.getOperand(OpIdx); 3965 if (!MO.isImm() && !MO.isExpr()) 3966 continue; 3967 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3968 continue; 3969 3970 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3971 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3972 Error(getConstLoc(Operands), 3973 "inline constants are not allowed for this operand"); 3974 return false; 3975 } 3976 3977 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3978 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3979 if (NumLiterals == 0 || LiteralValue != Value) { 3980 LiteralValue = Value; 3981 ++NumLiterals; 3982 } 3983 } else if (MO.isExpr()) { 3984 ++NumExprs; 3985 } 3986 } 3987 NumLiterals += NumExprs; 3988 3989 if (!NumLiterals) 3990 return true; 3991 3992 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3993 Error(getLitLoc(Operands), "literal operands are not supported"); 3994 return false; 3995 } 3996 3997 if (NumLiterals > 1) { 3998 Error(getLitLoc(Operands), "only one literal operand is allowed"); 3999 return false; 4000 } 4001 4002 return true; 4003 } 4004 4005 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4006 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4007 const MCRegisterInfo *MRI) { 4008 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4009 if (OpIdx < 0) 4010 return -1; 4011 4012 const MCOperand &Op = Inst.getOperand(OpIdx); 4013 if (!Op.isReg()) 4014 return -1; 4015 4016 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4017 auto Reg = Sub ? Sub : Op.getReg(); 4018 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4019 return AGRP32.contains(Reg) ? 1 : 0; 4020 } 4021 4022 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4023 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4024 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4025 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4026 SIInstrFlags::DS)) == 0) 4027 return true; 4028 4029 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4030 : AMDGPU::OpName::vdata; 4031 4032 const MCRegisterInfo *MRI = getMRI(); 4033 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4034 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4035 4036 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4037 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4038 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4039 return false; 4040 } 4041 4042 auto FB = getFeatureBits(); 4043 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4044 if (DataAreg < 0 || DstAreg < 0) 4045 return true; 4046 return DstAreg == DataAreg; 4047 } 4048 4049 return DstAreg < 1 && DataAreg < 1; 4050 } 4051 4052 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4053 auto FB = getFeatureBits(); 4054 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4055 return true; 4056 4057 const MCRegisterInfo *MRI = getMRI(); 4058 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4059 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4060 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4061 const MCOperand &Op = Inst.getOperand(I); 4062 if (!Op.isReg()) 4063 continue; 4064 4065 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4066 if (!Sub) 4067 continue; 4068 4069 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4070 return false; 4071 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4072 return false; 4073 } 4074 4075 return true; 4076 } 4077 4078 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4079 const OperandVector &Operands, 4080 const SMLoc &IDLoc) { 4081 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4082 AMDGPU::OpName::cpol); 4083 if (CPolPos == -1) 4084 return true; 4085 4086 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4087 4088 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4089 if ((TSFlags & (SIInstrFlags::SMRD)) && 4090 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4091 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4092 return false; 4093 } 4094 4095 if (isGFX90A() && (CPol & CPol::SCC)) { 4096 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4097 StringRef CStr(S.getPointer()); 4098 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4099 Error(S, "scc is not supported on this GPU"); 4100 return false; 4101 } 4102 4103 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4104 return true; 4105 4106 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4107 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4108 Error(IDLoc, "instruction must use glc"); 4109 return false; 4110 } 4111 } else { 4112 if (CPol & CPol::GLC) { 4113 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4114 StringRef CStr(S.getPointer()); 4115 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4116 Error(S, "instruction must not use glc"); 4117 return false; 4118 } 4119 } 4120 4121 return true; 4122 } 4123 4124 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4125 const SMLoc &IDLoc, 4126 const OperandVector &Operands) { 4127 if (auto ErrMsg = validateLdsDirect(Inst)) { 4128 Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg); 4129 return false; 4130 } 4131 if (!validateSOPLiteral(Inst)) { 4132 Error(getLitLoc(Operands), 4133 "only one literal operand is allowed"); 4134 return false; 4135 } 4136 if (!validateVOP3Literal(Inst, Operands)) { 4137 return false; 4138 } 4139 if (!validateConstantBusLimitations(Inst, Operands)) { 4140 return false; 4141 } 4142 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4143 return false; 4144 } 4145 if (!validateIntClampSupported(Inst)) { 4146 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4147 "integer clamping is not supported on this GPU"); 4148 return false; 4149 } 4150 if (!validateOpSel(Inst)) { 4151 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4152 "invalid op_sel operand"); 4153 return false; 4154 } 4155 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4156 if (!validateMIMGD16(Inst)) { 4157 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4158 "d16 modifier is not supported on this GPU"); 4159 return false; 4160 } 4161 if (!validateMIMGDim(Inst)) { 4162 Error(IDLoc, "dim modifier is required on this GPU"); 4163 return false; 4164 } 4165 if (!validateMIMGMSAA(Inst)) { 4166 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4167 "invalid dim; must be MSAA type"); 4168 return false; 4169 } 4170 if (!validateMIMGDataSize(Inst)) { 4171 Error(IDLoc, 4172 "image data size does not match dmask and tfe"); 4173 return false; 4174 } 4175 if (!validateMIMGAddrSize(Inst)) { 4176 Error(IDLoc, 4177 "image address size does not match dim and a16"); 4178 return false; 4179 } 4180 if (!validateMIMGAtomicDMask(Inst)) { 4181 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4182 "invalid atomic image dmask"); 4183 return false; 4184 } 4185 if (!validateMIMGGatherDMask(Inst)) { 4186 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4187 "invalid image_gather dmask: only one bit must be set"); 4188 return false; 4189 } 4190 if (!validateMovrels(Inst, Operands)) { 4191 return false; 4192 } 4193 if (!validateFlatOffset(Inst, Operands)) { 4194 return false; 4195 } 4196 if (!validateSMEMOffset(Inst, Operands)) { 4197 return false; 4198 } 4199 if (!validateMAIAccWrite(Inst, Operands)) { 4200 return false; 4201 } 4202 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4203 return false; 4204 } 4205 4206 if (!validateAGPRLdSt(Inst)) { 4207 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4208 ? "invalid register class: data and dst should be all VGPR or AGPR" 4209 : "invalid register class: agpr loads and stores not supported on this GPU" 4210 ); 4211 return false; 4212 } 4213 if (!validateVGPRAlign(Inst)) { 4214 Error(IDLoc, 4215 "invalid register class: vgpr tuples must be 64 bit aligned"); 4216 return false; 4217 } 4218 4219 if (!validateDivScale(Inst)) { 4220 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4221 return false; 4222 } 4223 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4224 return false; 4225 } 4226 4227 return true; 4228 } 4229 4230 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4231 const FeatureBitset &FBS, 4232 unsigned VariantID = 0); 4233 4234 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4235 const FeatureBitset &AvailableFeatures, 4236 unsigned VariantID); 4237 4238 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4239 const FeatureBitset &FBS) { 4240 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4241 } 4242 4243 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4244 const FeatureBitset &FBS, 4245 ArrayRef<unsigned> Variants) { 4246 for (auto Variant : Variants) { 4247 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4248 return true; 4249 } 4250 4251 return false; 4252 } 4253 4254 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4255 const SMLoc &IDLoc) { 4256 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4257 4258 // Check if requested instruction variant is supported. 4259 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4260 return false; 4261 4262 // This instruction is not supported. 4263 // Clear any other pending errors because they are no longer relevant. 4264 getParser().clearPendingErrors(); 4265 4266 // Requested instruction variant is not supported. 4267 // Check if any other variants are supported. 4268 StringRef VariantName = getMatchedVariantName(); 4269 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4270 return Error(IDLoc, 4271 Twine(VariantName, 4272 " variant of this instruction is not supported")); 4273 } 4274 4275 // Finally check if this instruction is supported on any other GPU. 4276 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4277 return Error(IDLoc, "instruction not supported on this GPU"); 4278 } 4279 4280 // Instruction not supported on any GPU. Probably a typo. 4281 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4282 return Error(IDLoc, "invalid instruction" + Suggestion); 4283 } 4284 4285 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4286 OperandVector &Operands, 4287 MCStreamer &Out, 4288 uint64_t &ErrorInfo, 4289 bool MatchingInlineAsm) { 4290 MCInst Inst; 4291 unsigned Result = Match_Success; 4292 for (auto Variant : getMatchedVariants()) { 4293 uint64_t EI; 4294 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4295 Variant); 4296 // We order match statuses from least to most specific. We use most specific 4297 // status as resulting 4298 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4299 if ((R == Match_Success) || 4300 (R == Match_PreferE32) || 4301 (R == Match_MissingFeature && Result != Match_PreferE32) || 4302 (R == Match_InvalidOperand && Result != Match_MissingFeature 4303 && Result != Match_PreferE32) || 4304 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4305 && Result != Match_MissingFeature 4306 && Result != Match_PreferE32)) { 4307 Result = R; 4308 ErrorInfo = EI; 4309 } 4310 if (R == Match_Success) 4311 break; 4312 } 4313 4314 if (Result == Match_Success) { 4315 if (!validateInstruction(Inst, IDLoc, Operands)) { 4316 return true; 4317 } 4318 Inst.setLoc(IDLoc); 4319 Out.emitInstruction(Inst, getSTI()); 4320 return false; 4321 } 4322 4323 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4324 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4325 return true; 4326 } 4327 4328 switch (Result) { 4329 default: break; 4330 case Match_MissingFeature: 4331 // It has been verified that the specified instruction 4332 // mnemonic is valid. A match was found but it requires 4333 // features which are not supported on this GPU. 4334 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4335 4336 case Match_InvalidOperand: { 4337 SMLoc ErrorLoc = IDLoc; 4338 if (ErrorInfo != ~0ULL) { 4339 if (ErrorInfo >= Operands.size()) { 4340 return Error(IDLoc, "too few operands for instruction"); 4341 } 4342 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4343 if (ErrorLoc == SMLoc()) 4344 ErrorLoc = IDLoc; 4345 } 4346 return Error(ErrorLoc, "invalid operand for instruction"); 4347 } 4348 4349 case Match_PreferE32: 4350 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4351 "should be encoded as e32"); 4352 case Match_MnemonicFail: 4353 llvm_unreachable("Invalid instructions should have been handled already"); 4354 } 4355 llvm_unreachable("Implement any new match types added!"); 4356 } 4357 4358 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4359 int64_t Tmp = -1; 4360 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4361 return true; 4362 } 4363 if (getParser().parseAbsoluteExpression(Tmp)) { 4364 return true; 4365 } 4366 Ret = static_cast<uint32_t>(Tmp); 4367 return false; 4368 } 4369 4370 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4371 uint32_t &Minor) { 4372 if (ParseAsAbsoluteExpression(Major)) 4373 return TokError("invalid major version"); 4374 4375 if (!trySkipToken(AsmToken::Comma)) 4376 return TokError("minor version number required, comma expected"); 4377 4378 if (ParseAsAbsoluteExpression(Minor)) 4379 return TokError("invalid minor version"); 4380 4381 return false; 4382 } 4383 4384 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4385 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4386 return TokError("directive only supported for amdgcn architecture"); 4387 4388 std::string TargetIDDirective; 4389 SMLoc TargetStart = getTok().getLoc(); 4390 if (getParser().parseEscapedString(TargetIDDirective)) 4391 return true; 4392 4393 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4394 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4395 return getParser().Error(TargetRange.Start, 4396 (Twine(".amdgcn_target directive's target id ") + 4397 Twine(TargetIDDirective) + 4398 Twine(" does not match the specified target id ") + 4399 Twine(getTargetStreamer().getTargetID()->toString())).str()); 4400 4401 return false; 4402 } 4403 4404 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4405 return Error(Range.Start, "value out of range", Range); 4406 } 4407 4408 bool AMDGPUAsmParser::calculateGPRBlocks( 4409 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4410 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4411 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4412 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4413 // TODO(scott.linder): These calculations are duplicated from 4414 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4415 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4416 4417 unsigned NumVGPRs = NextFreeVGPR; 4418 unsigned NumSGPRs = NextFreeSGPR; 4419 4420 if (Version.Major >= 10) 4421 NumSGPRs = 0; 4422 else { 4423 unsigned MaxAddressableNumSGPRs = 4424 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4425 4426 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4427 NumSGPRs > MaxAddressableNumSGPRs) 4428 return OutOfRangeError(SGPRRange); 4429 4430 NumSGPRs += 4431 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4432 4433 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4434 NumSGPRs > MaxAddressableNumSGPRs) 4435 return OutOfRangeError(SGPRRange); 4436 4437 if (Features.test(FeatureSGPRInitBug)) 4438 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4439 } 4440 4441 VGPRBlocks = 4442 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4443 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4444 4445 return false; 4446 } 4447 4448 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4449 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4450 return TokError("directive only supported for amdgcn architecture"); 4451 4452 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4453 return TokError("directive only supported for amdhsa OS"); 4454 4455 StringRef KernelName; 4456 if (getParser().parseIdentifier(KernelName)) 4457 return true; 4458 4459 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4460 4461 StringSet<> Seen; 4462 4463 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4464 4465 SMRange VGPRRange; 4466 uint64_t NextFreeVGPR = 0; 4467 uint64_t AccumOffset = 0; 4468 SMRange SGPRRange; 4469 uint64_t NextFreeSGPR = 0; 4470 unsigned UserSGPRCount = 0; 4471 bool ReserveVCC = true; 4472 bool ReserveFlatScr = true; 4473 Optional<bool> EnableWavefrontSize32; 4474 4475 while (true) { 4476 while (trySkipToken(AsmToken::EndOfStatement)); 4477 4478 StringRef ID; 4479 SMRange IDRange = getTok().getLocRange(); 4480 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4481 return true; 4482 4483 if (ID == ".end_amdhsa_kernel") 4484 break; 4485 4486 if (Seen.find(ID) != Seen.end()) 4487 return TokError(".amdhsa_ directives cannot be repeated"); 4488 Seen.insert(ID); 4489 4490 SMLoc ValStart = getLoc(); 4491 int64_t IVal; 4492 if (getParser().parseAbsoluteExpression(IVal)) 4493 return true; 4494 SMLoc ValEnd = getLoc(); 4495 SMRange ValRange = SMRange(ValStart, ValEnd); 4496 4497 if (IVal < 0) 4498 return OutOfRangeError(ValRange); 4499 4500 uint64_t Val = IVal; 4501 4502 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4503 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4504 return OutOfRangeError(RANGE); \ 4505 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4506 4507 if (ID == ".amdhsa_group_segment_fixed_size") { 4508 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4509 return OutOfRangeError(ValRange); 4510 KD.group_segment_fixed_size = Val; 4511 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4512 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4513 return OutOfRangeError(ValRange); 4514 KD.private_segment_fixed_size = Val; 4515 } else if (ID == ".amdhsa_kernarg_size") { 4516 if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val)) 4517 return OutOfRangeError(ValRange); 4518 KD.kernarg_size = Val; 4519 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4520 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4521 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4522 Val, ValRange); 4523 if (Val) 4524 UserSGPRCount += 4; 4525 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4526 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4527 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4528 ValRange); 4529 if (Val) 4530 UserSGPRCount += 2; 4531 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4532 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4533 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4534 ValRange); 4535 if (Val) 4536 UserSGPRCount += 2; 4537 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4538 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4539 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4540 Val, ValRange); 4541 if (Val) 4542 UserSGPRCount += 2; 4543 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4544 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4545 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4546 ValRange); 4547 if (Val) 4548 UserSGPRCount += 2; 4549 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4550 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4551 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4552 ValRange); 4553 if (Val) 4554 UserSGPRCount += 2; 4555 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4556 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4557 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4558 Val, ValRange); 4559 if (Val) 4560 UserSGPRCount += 1; 4561 } else if (ID == ".amdhsa_wavefront_size32") { 4562 if (IVersion.Major < 10) 4563 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4564 EnableWavefrontSize32 = Val; 4565 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4566 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4567 Val, ValRange); 4568 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4569 PARSE_BITS_ENTRY( 4570 KD.compute_pgm_rsrc2, 4571 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4572 ValRange); 4573 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4574 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4575 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4576 ValRange); 4577 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4578 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4579 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4580 ValRange); 4581 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4582 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4583 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4584 ValRange); 4585 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4586 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4587 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4588 ValRange); 4589 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4590 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4591 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4592 ValRange); 4593 } else if (ID == ".amdhsa_next_free_vgpr") { 4594 VGPRRange = ValRange; 4595 NextFreeVGPR = Val; 4596 } else if (ID == ".amdhsa_next_free_sgpr") { 4597 SGPRRange = ValRange; 4598 NextFreeSGPR = Val; 4599 } else if (ID == ".amdhsa_accum_offset") { 4600 if (!isGFX90A()) 4601 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4602 AccumOffset = Val; 4603 } else if (ID == ".amdhsa_reserve_vcc") { 4604 if (!isUInt<1>(Val)) 4605 return OutOfRangeError(ValRange); 4606 ReserveVCC = Val; 4607 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4608 if (IVersion.Major < 7) 4609 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4610 if (!isUInt<1>(Val)) 4611 return OutOfRangeError(ValRange); 4612 ReserveFlatScr = Val; 4613 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4614 if (IVersion.Major < 8) 4615 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4616 if (!isUInt<1>(Val)) 4617 return OutOfRangeError(ValRange); 4618 if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny()) 4619 return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id", 4620 IDRange); 4621 } else if (ID == ".amdhsa_float_round_mode_32") { 4622 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4623 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4624 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4625 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4626 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4627 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4628 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4629 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4630 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4631 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4632 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4633 ValRange); 4634 } else if (ID == ".amdhsa_dx10_clamp") { 4635 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4636 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4637 } else if (ID == ".amdhsa_ieee_mode") { 4638 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4639 Val, ValRange); 4640 } else if (ID == ".amdhsa_fp16_overflow") { 4641 if (IVersion.Major < 9) 4642 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4643 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4644 ValRange); 4645 } else if (ID == ".amdhsa_tg_split") { 4646 if (!isGFX90A()) 4647 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4648 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4649 ValRange); 4650 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4651 if (IVersion.Major < 10) 4652 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4653 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4654 ValRange); 4655 } else if (ID == ".amdhsa_memory_ordered") { 4656 if (IVersion.Major < 10) 4657 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4658 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4659 ValRange); 4660 } else if (ID == ".amdhsa_forward_progress") { 4661 if (IVersion.Major < 10) 4662 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4663 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4664 ValRange); 4665 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4666 PARSE_BITS_ENTRY( 4667 KD.compute_pgm_rsrc2, 4668 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4669 ValRange); 4670 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4671 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4672 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4673 Val, ValRange); 4674 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4675 PARSE_BITS_ENTRY( 4676 KD.compute_pgm_rsrc2, 4677 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4678 ValRange); 4679 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4680 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4681 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4682 Val, ValRange); 4683 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4684 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4685 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4686 Val, ValRange); 4687 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4688 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4689 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4690 Val, ValRange); 4691 } else if (ID == ".amdhsa_exception_int_div_zero") { 4692 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4693 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4694 Val, ValRange); 4695 } else { 4696 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4697 } 4698 4699 #undef PARSE_BITS_ENTRY 4700 } 4701 4702 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4703 return TokError(".amdhsa_next_free_vgpr directive is required"); 4704 4705 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4706 return TokError(".amdhsa_next_free_sgpr directive is required"); 4707 4708 unsigned VGPRBlocks; 4709 unsigned SGPRBlocks; 4710 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4711 getTargetStreamer().getTargetID()->isXnackOnOrAny(), 4712 EnableWavefrontSize32, NextFreeVGPR, 4713 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4714 SGPRBlocks)) 4715 return true; 4716 4717 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4718 VGPRBlocks)) 4719 return OutOfRangeError(VGPRRange); 4720 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4721 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4722 4723 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4724 SGPRBlocks)) 4725 return OutOfRangeError(SGPRRange); 4726 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4727 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4728 SGPRBlocks); 4729 4730 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4731 return TokError("too many user SGPRs enabled"); 4732 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4733 UserSGPRCount); 4734 4735 if (isGFX90A()) { 4736 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4737 return TokError(".amdhsa_accum_offset directive is required"); 4738 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4739 return TokError("accum_offset should be in range [4..256] in " 4740 "increments of 4"); 4741 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4742 return TokError("accum_offset exceeds total VGPR allocation"); 4743 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4744 (AccumOffset / 4 - 1)); 4745 } 4746 4747 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4748 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4749 ReserveFlatScr); 4750 return false; 4751 } 4752 4753 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4754 uint32_t Major; 4755 uint32_t Minor; 4756 4757 if (ParseDirectiveMajorMinor(Major, Minor)) 4758 return true; 4759 4760 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4761 return false; 4762 } 4763 4764 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4765 uint32_t Major; 4766 uint32_t Minor; 4767 uint32_t Stepping; 4768 StringRef VendorName; 4769 StringRef ArchName; 4770 4771 // If this directive has no arguments, then use the ISA version for the 4772 // targeted GPU. 4773 if (isToken(AsmToken::EndOfStatement)) { 4774 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4775 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor, 4776 ISA.Stepping, 4777 "AMD", "AMDGPU"); 4778 return false; 4779 } 4780 4781 if (ParseDirectiveMajorMinor(Major, Minor)) 4782 return true; 4783 4784 if (!trySkipToken(AsmToken::Comma)) 4785 return TokError("stepping version number required, comma expected"); 4786 4787 if (ParseAsAbsoluteExpression(Stepping)) 4788 return TokError("invalid stepping version"); 4789 4790 if (!trySkipToken(AsmToken::Comma)) 4791 return TokError("vendor name required, comma expected"); 4792 4793 if (!parseString(VendorName, "invalid vendor name")) 4794 return true; 4795 4796 if (!trySkipToken(AsmToken::Comma)) 4797 return TokError("arch name required, comma expected"); 4798 4799 if (!parseString(ArchName, "invalid arch name")) 4800 return true; 4801 4802 getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping, 4803 VendorName, ArchName); 4804 return false; 4805 } 4806 4807 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4808 amd_kernel_code_t &Header) { 4809 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4810 // assembly for backwards compatibility. 4811 if (ID == "max_scratch_backing_memory_byte_size") { 4812 Parser.eatToEndOfStatement(); 4813 return false; 4814 } 4815 4816 SmallString<40> ErrStr; 4817 raw_svector_ostream Err(ErrStr); 4818 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4819 return TokError(Err.str()); 4820 } 4821 Lex(); 4822 4823 if (ID == "enable_wavefront_size32") { 4824 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4825 if (!isGFX10Plus()) 4826 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4827 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4828 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4829 } else { 4830 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4831 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4832 } 4833 } 4834 4835 if (ID == "wavefront_size") { 4836 if (Header.wavefront_size == 5) { 4837 if (!isGFX10Plus()) 4838 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4839 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4840 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4841 } else if (Header.wavefront_size == 6) { 4842 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4843 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4844 } 4845 } 4846 4847 if (ID == "enable_wgp_mode") { 4848 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4849 !isGFX10Plus()) 4850 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4851 } 4852 4853 if (ID == "enable_mem_ordered") { 4854 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4855 !isGFX10Plus()) 4856 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4857 } 4858 4859 if (ID == "enable_fwd_progress") { 4860 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4861 !isGFX10Plus()) 4862 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4863 } 4864 4865 return false; 4866 } 4867 4868 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4869 amd_kernel_code_t Header; 4870 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4871 4872 while (true) { 4873 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4874 // will set the current token to EndOfStatement. 4875 while(trySkipToken(AsmToken::EndOfStatement)); 4876 4877 StringRef ID; 4878 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4879 return true; 4880 4881 if (ID == ".end_amd_kernel_code_t") 4882 break; 4883 4884 if (ParseAMDKernelCodeTValue(ID, Header)) 4885 return true; 4886 } 4887 4888 getTargetStreamer().EmitAMDKernelCodeT(Header); 4889 4890 return false; 4891 } 4892 4893 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4894 StringRef KernelName; 4895 if (!parseId(KernelName, "expected symbol name")) 4896 return true; 4897 4898 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4899 ELF::STT_AMDGPU_HSA_KERNEL); 4900 4901 KernelScope.initialize(getContext()); 4902 return false; 4903 } 4904 4905 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4906 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4907 return Error(getLoc(), 4908 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4909 "architectures"); 4910 } 4911 4912 auto TargetIDDirective = getLexer().getTok().getStringContents(); 4913 if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective) 4914 return Error(getParser().getTok().getLoc(), "target id must match options"); 4915 4916 getTargetStreamer().EmitISAVersion(); 4917 Lex(); 4918 4919 return false; 4920 } 4921 4922 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4923 const char *AssemblerDirectiveBegin; 4924 const char *AssemblerDirectiveEnd; 4925 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4926 isHsaAbiVersion3Or4(&getSTI()) 4927 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4928 HSAMD::V3::AssemblerDirectiveEnd) 4929 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4930 HSAMD::AssemblerDirectiveEnd); 4931 4932 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4933 return Error(getLoc(), 4934 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4935 "not available on non-amdhsa OSes")).str()); 4936 } 4937 4938 std::string HSAMetadataString; 4939 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4940 HSAMetadataString)) 4941 return true; 4942 4943 if (isHsaAbiVersion3Or4(&getSTI())) { 4944 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4945 return Error(getLoc(), "invalid HSA metadata"); 4946 } else { 4947 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4948 return Error(getLoc(), "invalid HSA metadata"); 4949 } 4950 4951 return false; 4952 } 4953 4954 /// Common code to parse out a block of text (typically YAML) between start and 4955 /// end directives. 4956 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4957 const char *AssemblerDirectiveEnd, 4958 std::string &CollectString) { 4959 4960 raw_string_ostream CollectStream(CollectString); 4961 4962 getLexer().setSkipSpace(false); 4963 4964 bool FoundEnd = false; 4965 while (!isToken(AsmToken::Eof)) { 4966 while (isToken(AsmToken::Space)) { 4967 CollectStream << getTokenStr(); 4968 Lex(); 4969 } 4970 4971 if (trySkipId(AssemblerDirectiveEnd)) { 4972 FoundEnd = true; 4973 break; 4974 } 4975 4976 CollectStream << Parser.parseStringToEndOfStatement() 4977 << getContext().getAsmInfo()->getSeparatorString(); 4978 4979 Parser.eatToEndOfStatement(); 4980 } 4981 4982 getLexer().setSkipSpace(true); 4983 4984 if (isToken(AsmToken::Eof) && !FoundEnd) { 4985 return TokError(Twine("expected directive ") + 4986 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4987 } 4988 4989 CollectStream.flush(); 4990 return false; 4991 } 4992 4993 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4994 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4995 std::string String; 4996 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4997 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4998 return true; 4999 5000 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5001 if (!PALMetadata->setFromString(String)) 5002 return Error(getLoc(), "invalid PAL metadata"); 5003 return false; 5004 } 5005 5006 /// Parse the assembler directive for old linear-format PAL metadata. 5007 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5008 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5009 return Error(getLoc(), 5010 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5011 "not available on non-amdpal OSes")).str()); 5012 } 5013 5014 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5015 PALMetadata->setLegacy(); 5016 for (;;) { 5017 uint32_t Key, Value; 5018 if (ParseAsAbsoluteExpression(Key)) { 5019 return TokError(Twine("invalid value in ") + 5020 Twine(PALMD::AssemblerDirective)); 5021 } 5022 if (!trySkipToken(AsmToken::Comma)) { 5023 return TokError(Twine("expected an even number of values in ") + 5024 Twine(PALMD::AssemblerDirective)); 5025 } 5026 if (ParseAsAbsoluteExpression(Value)) { 5027 return TokError(Twine("invalid value in ") + 5028 Twine(PALMD::AssemblerDirective)); 5029 } 5030 PALMetadata->setRegister(Key, Value); 5031 if (!trySkipToken(AsmToken::Comma)) 5032 break; 5033 } 5034 return false; 5035 } 5036 5037 /// ParseDirectiveAMDGPULDS 5038 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5039 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5040 if (getParser().checkForValidSection()) 5041 return true; 5042 5043 StringRef Name; 5044 SMLoc NameLoc = getLoc(); 5045 if (getParser().parseIdentifier(Name)) 5046 return TokError("expected identifier in directive"); 5047 5048 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5049 if (parseToken(AsmToken::Comma, "expected ','")) 5050 return true; 5051 5052 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5053 5054 int64_t Size; 5055 SMLoc SizeLoc = getLoc(); 5056 if (getParser().parseAbsoluteExpression(Size)) 5057 return true; 5058 if (Size < 0) 5059 return Error(SizeLoc, "size must be non-negative"); 5060 if (Size > LocalMemorySize) 5061 return Error(SizeLoc, "size is too large"); 5062 5063 int64_t Alignment = 4; 5064 if (trySkipToken(AsmToken::Comma)) { 5065 SMLoc AlignLoc = getLoc(); 5066 if (getParser().parseAbsoluteExpression(Alignment)) 5067 return true; 5068 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5069 return Error(AlignLoc, "alignment must be a power of two"); 5070 5071 // Alignment larger than the size of LDS is possible in theory, as long 5072 // as the linker manages to place to symbol at address 0, but we do want 5073 // to make sure the alignment fits nicely into a 32-bit integer. 5074 if (Alignment >= 1u << 31) 5075 return Error(AlignLoc, "alignment is too large"); 5076 } 5077 5078 if (parseToken(AsmToken::EndOfStatement, 5079 "unexpected token in '.amdgpu_lds' directive")) 5080 return true; 5081 5082 Symbol->redefineIfPossible(); 5083 if (!Symbol->isUndefined()) 5084 return Error(NameLoc, "invalid symbol redefinition"); 5085 5086 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5087 return false; 5088 } 5089 5090 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5091 StringRef IDVal = DirectiveID.getString(); 5092 5093 if (isHsaAbiVersion3Or4(&getSTI())) { 5094 if (IDVal == ".amdhsa_kernel") 5095 return ParseDirectiveAMDHSAKernel(); 5096 5097 // TODO: Restructure/combine with PAL metadata directive. 5098 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5099 return ParseDirectiveHSAMetadata(); 5100 } else { 5101 if (IDVal == ".hsa_code_object_version") 5102 return ParseDirectiveHSACodeObjectVersion(); 5103 5104 if (IDVal == ".hsa_code_object_isa") 5105 return ParseDirectiveHSACodeObjectISA(); 5106 5107 if (IDVal == ".amd_kernel_code_t") 5108 return ParseDirectiveAMDKernelCodeT(); 5109 5110 if (IDVal == ".amdgpu_hsa_kernel") 5111 return ParseDirectiveAMDGPUHsaKernel(); 5112 5113 if (IDVal == ".amd_amdgpu_isa") 5114 return ParseDirectiveISAVersion(); 5115 5116 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5117 return ParseDirectiveHSAMetadata(); 5118 } 5119 5120 if (IDVal == ".amdgcn_target") 5121 return ParseDirectiveAMDGCNTarget(); 5122 5123 if (IDVal == ".amdgpu_lds") 5124 return ParseDirectiveAMDGPULDS(); 5125 5126 if (IDVal == PALMD::AssemblerDirectiveBegin) 5127 return ParseDirectivePALMetadataBegin(); 5128 5129 if (IDVal == PALMD::AssemblerDirective) 5130 return ParseDirectivePALMetadata(); 5131 5132 return true; 5133 } 5134 5135 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5136 unsigned RegNo) { 5137 5138 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5139 R.isValid(); ++R) { 5140 if (*R == RegNo) 5141 return isGFX9Plus(); 5142 } 5143 5144 // GFX10 has 2 more SGPRs 104 and 105. 5145 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5146 R.isValid(); ++R) { 5147 if (*R == RegNo) 5148 return hasSGPR104_SGPR105(); 5149 } 5150 5151 switch (RegNo) { 5152 case AMDGPU::SRC_SHARED_BASE: 5153 case AMDGPU::SRC_SHARED_LIMIT: 5154 case AMDGPU::SRC_PRIVATE_BASE: 5155 case AMDGPU::SRC_PRIVATE_LIMIT: 5156 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5157 return isGFX9Plus(); 5158 case AMDGPU::TBA: 5159 case AMDGPU::TBA_LO: 5160 case AMDGPU::TBA_HI: 5161 case AMDGPU::TMA: 5162 case AMDGPU::TMA_LO: 5163 case AMDGPU::TMA_HI: 5164 return !isGFX9Plus(); 5165 case AMDGPU::XNACK_MASK: 5166 case AMDGPU::XNACK_MASK_LO: 5167 case AMDGPU::XNACK_MASK_HI: 5168 return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported(); 5169 case AMDGPU::SGPR_NULL: 5170 return isGFX10Plus(); 5171 default: 5172 break; 5173 } 5174 5175 if (isCI()) 5176 return true; 5177 5178 if (isSI() || isGFX10Plus()) { 5179 // No flat_scr on SI. 5180 // On GFX10 flat scratch is not a valid register operand and can only be 5181 // accessed with s_setreg/s_getreg. 5182 switch (RegNo) { 5183 case AMDGPU::FLAT_SCR: 5184 case AMDGPU::FLAT_SCR_LO: 5185 case AMDGPU::FLAT_SCR_HI: 5186 return false; 5187 default: 5188 return true; 5189 } 5190 } 5191 5192 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5193 // SI/CI have. 5194 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5195 R.isValid(); ++R) { 5196 if (*R == RegNo) 5197 return hasSGPR102_SGPR103(); 5198 } 5199 5200 return true; 5201 } 5202 5203 OperandMatchResultTy 5204 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5205 OperandMode Mode) { 5206 // Try to parse with a custom parser 5207 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5208 5209 // If we successfully parsed the operand or if there as an error parsing, 5210 // we are done. 5211 // 5212 // If we are parsing after we reach EndOfStatement then this means we 5213 // are appending default values to the Operands list. This is only done 5214 // by custom parser, so we shouldn't continue on to the generic parsing. 5215 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5216 isToken(AsmToken::EndOfStatement)) 5217 return ResTy; 5218 5219 SMLoc RBraceLoc; 5220 SMLoc LBraceLoc = getLoc(); 5221 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5222 unsigned Prefix = Operands.size(); 5223 5224 for (;;) { 5225 auto Loc = getLoc(); 5226 ResTy = parseReg(Operands); 5227 if (ResTy == MatchOperand_NoMatch) 5228 Error(Loc, "expected a register"); 5229 if (ResTy != MatchOperand_Success) 5230 return MatchOperand_ParseFail; 5231 5232 RBraceLoc = getLoc(); 5233 if (trySkipToken(AsmToken::RBrac)) 5234 break; 5235 5236 if (!skipToken(AsmToken::Comma, 5237 "expected a comma or a closing square bracket")) { 5238 return MatchOperand_ParseFail; 5239 } 5240 } 5241 5242 if (Operands.size() - Prefix > 1) { 5243 Operands.insert(Operands.begin() + Prefix, 5244 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5245 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5246 } 5247 5248 return MatchOperand_Success; 5249 } 5250 5251 return parseRegOrImm(Operands); 5252 } 5253 5254 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5255 // Clear any forced encodings from the previous instruction. 5256 setForcedEncodingSize(0); 5257 setForcedDPP(false); 5258 setForcedSDWA(false); 5259 5260 if (Name.endswith("_e64")) { 5261 setForcedEncodingSize(64); 5262 return Name.substr(0, Name.size() - 4); 5263 } else if (Name.endswith("_e32")) { 5264 setForcedEncodingSize(32); 5265 return Name.substr(0, Name.size() - 4); 5266 } else if (Name.endswith("_dpp")) { 5267 setForcedDPP(true); 5268 return Name.substr(0, Name.size() - 4); 5269 } else if (Name.endswith("_sdwa")) { 5270 setForcedSDWA(true); 5271 return Name.substr(0, Name.size() - 5); 5272 } 5273 return Name; 5274 } 5275 5276 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5277 StringRef Name, 5278 SMLoc NameLoc, OperandVector &Operands) { 5279 // Add the instruction mnemonic 5280 Name = parseMnemonicSuffix(Name); 5281 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5282 5283 bool IsMIMG = Name.startswith("image_"); 5284 5285 while (!trySkipToken(AsmToken::EndOfStatement)) { 5286 OperandMode Mode = OperandMode_Default; 5287 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5288 Mode = OperandMode_NSA; 5289 CPolSeen = 0; 5290 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5291 5292 if (Res != MatchOperand_Success) { 5293 checkUnsupportedInstruction(Name, NameLoc); 5294 if (!Parser.hasPendingError()) { 5295 // FIXME: use real operand location rather than the current location. 5296 StringRef Msg = 5297 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5298 "not a valid operand."; 5299 Error(getLoc(), Msg); 5300 } 5301 while (!trySkipToken(AsmToken::EndOfStatement)) { 5302 lex(); 5303 } 5304 return true; 5305 } 5306 5307 // Eat the comma or space if there is one. 5308 trySkipToken(AsmToken::Comma); 5309 } 5310 5311 return false; 5312 } 5313 5314 //===----------------------------------------------------------------------===// 5315 // Utility functions 5316 //===----------------------------------------------------------------------===// 5317 5318 OperandMatchResultTy 5319 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5320 5321 if (!trySkipId(Prefix, AsmToken::Colon)) 5322 return MatchOperand_NoMatch; 5323 5324 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5325 } 5326 5327 OperandMatchResultTy 5328 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5329 AMDGPUOperand::ImmTy ImmTy, 5330 bool (*ConvertResult)(int64_t&)) { 5331 SMLoc S = getLoc(); 5332 int64_t Value = 0; 5333 5334 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5335 if (Res != MatchOperand_Success) 5336 return Res; 5337 5338 if (ConvertResult && !ConvertResult(Value)) { 5339 Error(S, "invalid " + StringRef(Prefix) + " value."); 5340 } 5341 5342 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5343 return MatchOperand_Success; 5344 } 5345 5346 OperandMatchResultTy 5347 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5348 OperandVector &Operands, 5349 AMDGPUOperand::ImmTy ImmTy, 5350 bool (*ConvertResult)(int64_t&)) { 5351 SMLoc S = getLoc(); 5352 if (!trySkipId(Prefix, AsmToken::Colon)) 5353 return MatchOperand_NoMatch; 5354 5355 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5356 return MatchOperand_ParseFail; 5357 5358 unsigned Val = 0; 5359 const unsigned MaxSize = 4; 5360 5361 // FIXME: How to verify the number of elements matches the number of src 5362 // operands? 5363 for (int I = 0; ; ++I) { 5364 int64_t Op; 5365 SMLoc Loc = getLoc(); 5366 if (!parseExpr(Op)) 5367 return MatchOperand_ParseFail; 5368 5369 if (Op != 0 && Op != 1) { 5370 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5371 return MatchOperand_ParseFail; 5372 } 5373 5374 Val |= (Op << I); 5375 5376 if (trySkipToken(AsmToken::RBrac)) 5377 break; 5378 5379 if (I + 1 == MaxSize) { 5380 Error(getLoc(), "expected a closing square bracket"); 5381 return MatchOperand_ParseFail; 5382 } 5383 5384 if (!skipToken(AsmToken::Comma, "expected a comma")) 5385 return MatchOperand_ParseFail; 5386 } 5387 5388 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5389 return MatchOperand_Success; 5390 } 5391 5392 OperandMatchResultTy 5393 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5394 AMDGPUOperand::ImmTy ImmTy) { 5395 int64_t Bit; 5396 SMLoc S = getLoc(); 5397 5398 if (trySkipId(Name)) { 5399 Bit = 1; 5400 } else if (trySkipId("no", Name)) { 5401 Bit = 0; 5402 } else { 5403 return MatchOperand_NoMatch; 5404 } 5405 5406 if (Name == "r128" && !hasMIMG_R128()) { 5407 Error(S, "r128 modifier is not supported on this GPU"); 5408 return MatchOperand_ParseFail; 5409 } 5410 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5411 Error(S, "a16 modifier is not supported on this GPU"); 5412 return MatchOperand_ParseFail; 5413 } 5414 5415 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5416 ImmTy = AMDGPUOperand::ImmTyR128A16; 5417 5418 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5419 return MatchOperand_Success; 5420 } 5421 5422 OperandMatchResultTy 5423 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5424 unsigned CPolOn = 0; 5425 unsigned CPolOff = 0; 5426 SMLoc S = getLoc(); 5427 5428 if (trySkipId("glc")) 5429 CPolOn = AMDGPU::CPol::GLC; 5430 else if (trySkipId("noglc")) 5431 CPolOff = AMDGPU::CPol::GLC; 5432 else if (trySkipId("slc")) 5433 CPolOn = AMDGPU::CPol::SLC; 5434 else if (trySkipId("noslc")) 5435 CPolOff = AMDGPU::CPol::SLC; 5436 else if (trySkipId("dlc")) 5437 CPolOn = AMDGPU::CPol::DLC; 5438 else if (trySkipId("nodlc")) 5439 CPolOff = AMDGPU::CPol::DLC; 5440 else if (trySkipId("scc")) 5441 CPolOn = AMDGPU::CPol::SCC; 5442 else if (trySkipId("noscc")) 5443 CPolOff = AMDGPU::CPol::SCC; 5444 else 5445 return MatchOperand_NoMatch; 5446 5447 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5448 Error(S, "dlc modifier is not supported on this GPU"); 5449 return MatchOperand_ParseFail; 5450 } 5451 5452 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5453 Error(S, "scc modifier is not supported on this GPU"); 5454 return MatchOperand_ParseFail; 5455 } 5456 5457 if (CPolSeen & (CPolOn | CPolOff)) { 5458 Error(S, "duplicate cache policy modifier"); 5459 return MatchOperand_ParseFail; 5460 } 5461 5462 CPolSeen |= (CPolOn | CPolOff); 5463 5464 for (unsigned I = 1; I != Operands.size(); ++I) { 5465 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5466 if (Op.isCPol()) { 5467 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5468 return MatchOperand_Success; 5469 } 5470 } 5471 5472 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5473 AMDGPUOperand::ImmTyCPol)); 5474 5475 return MatchOperand_Success; 5476 } 5477 5478 static void addOptionalImmOperand( 5479 MCInst& Inst, const OperandVector& Operands, 5480 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5481 AMDGPUOperand::ImmTy ImmT, 5482 int64_t Default = 0) { 5483 auto i = OptionalIdx.find(ImmT); 5484 if (i != OptionalIdx.end()) { 5485 unsigned Idx = i->second; 5486 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5487 } else { 5488 Inst.addOperand(MCOperand::createImm(Default)); 5489 } 5490 } 5491 5492 OperandMatchResultTy 5493 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5494 StringRef &Value, 5495 SMLoc &StringLoc) { 5496 if (!trySkipId(Prefix, AsmToken::Colon)) 5497 return MatchOperand_NoMatch; 5498 5499 StringLoc = getLoc(); 5500 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5501 : MatchOperand_ParseFail; 5502 } 5503 5504 //===----------------------------------------------------------------------===// 5505 // MTBUF format 5506 //===----------------------------------------------------------------------===// 5507 5508 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5509 int64_t MaxVal, 5510 int64_t &Fmt) { 5511 int64_t Val; 5512 SMLoc Loc = getLoc(); 5513 5514 auto Res = parseIntWithPrefix(Pref, Val); 5515 if (Res == MatchOperand_ParseFail) 5516 return false; 5517 if (Res == MatchOperand_NoMatch) 5518 return true; 5519 5520 if (Val < 0 || Val > MaxVal) { 5521 Error(Loc, Twine("out of range ", StringRef(Pref))); 5522 return false; 5523 } 5524 5525 Fmt = Val; 5526 return true; 5527 } 5528 5529 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5530 // values to live in a joint format operand in the MCInst encoding. 5531 OperandMatchResultTy 5532 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5533 using namespace llvm::AMDGPU::MTBUFFormat; 5534 5535 int64_t Dfmt = DFMT_UNDEF; 5536 int64_t Nfmt = NFMT_UNDEF; 5537 5538 // dfmt and nfmt can appear in either order, and each is optional. 5539 for (int I = 0; I < 2; ++I) { 5540 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5541 return MatchOperand_ParseFail; 5542 5543 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5544 return MatchOperand_ParseFail; 5545 } 5546 // Skip optional comma between dfmt/nfmt 5547 // but guard against 2 commas following each other. 5548 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5549 !peekToken().is(AsmToken::Comma)) { 5550 trySkipToken(AsmToken::Comma); 5551 } 5552 } 5553 5554 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5555 return MatchOperand_NoMatch; 5556 5557 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5558 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5559 5560 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5561 return MatchOperand_Success; 5562 } 5563 5564 OperandMatchResultTy 5565 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5566 using namespace llvm::AMDGPU::MTBUFFormat; 5567 5568 int64_t Fmt = UFMT_UNDEF; 5569 5570 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5571 return MatchOperand_ParseFail; 5572 5573 if (Fmt == UFMT_UNDEF) 5574 return MatchOperand_NoMatch; 5575 5576 Format = Fmt; 5577 return MatchOperand_Success; 5578 } 5579 5580 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5581 int64_t &Nfmt, 5582 StringRef FormatStr, 5583 SMLoc Loc) { 5584 using namespace llvm::AMDGPU::MTBUFFormat; 5585 int64_t Format; 5586 5587 Format = getDfmt(FormatStr); 5588 if (Format != DFMT_UNDEF) { 5589 Dfmt = Format; 5590 return true; 5591 } 5592 5593 Format = getNfmt(FormatStr, getSTI()); 5594 if (Format != NFMT_UNDEF) { 5595 Nfmt = Format; 5596 return true; 5597 } 5598 5599 Error(Loc, "unsupported format"); 5600 return false; 5601 } 5602 5603 OperandMatchResultTy 5604 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5605 SMLoc FormatLoc, 5606 int64_t &Format) { 5607 using namespace llvm::AMDGPU::MTBUFFormat; 5608 5609 int64_t Dfmt = DFMT_UNDEF; 5610 int64_t Nfmt = NFMT_UNDEF; 5611 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5612 return MatchOperand_ParseFail; 5613 5614 if (trySkipToken(AsmToken::Comma)) { 5615 StringRef Str; 5616 SMLoc Loc = getLoc(); 5617 if (!parseId(Str, "expected a format string") || 5618 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5619 return MatchOperand_ParseFail; 5620 } 5621 if (Dfmt == DFMT_UNDEF) { 5622 Error(Loc, "duplicate numeric format"); 5623 return MatchOperand_ParseFail; 5624 } else if (Nfmt == NFMT_UNDEF) { 5625 Error(Loc, "duplicate data format"); 5626 return MatchOperand_ParseFail; 5627 } 5628 } 5629 5630 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5631 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5632 5633 if (isGFX10Plus()) { 5634 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5635 if (Ufmt == UFMT_UNDEF) { 5636 Error(FormatLoc, "unsupported format"); 5637 return MatchOperand_ParseFail; 5638 } 5639 Format = Ufmt; 5640 } else { 5641 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5642 } 5643 5644 return MatchOperand_Success; 5645 } 5646 5647 OperandMatchResultTy 5648 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5649 SMLoc Loc, 5650 int64_t &Format) { 5651 using namespace llvm::AMDGPU::MTBUFFormat; 5652 5653 auto Id = getUnifiedFormat(FormatStr); 5654 if (Id == UFMT_UNDEF) 5655 return MatchOperand_NoMatch; 5656 5657 if (!isGFX10Plus()) { 5658 Error(Loc, "unified format is not supported on this GPU"); 5659 return MatchOperand_ParseFail; 5660 } 5661 5662 Format = Id; 5663 return MatchOperand_Success; 5664 } 5665 5666 OperandMatchResultTy 5667 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5668 using namespace llvm::AMDGPU::MTBUFFormat; 5669 SMLoc Loc = getLoc(); 5670 5671 if (!parseExpr(Format)) 5672 return MatchOperand_ParseFail; 5673 if (!isValidFormatEncoding(Format, getSTI())) { 5674 Error(Loc, "out of range format"); 5675 return MatchOperand_ParseFail; 5676 } 5677 5678 return MatchOperand_Success; 5679 } 5680 5681 OperandMatchResultTy 5682 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5683 using namespace llvm::AMDGPU::MTBUFFormat; 5684 5685 if (!trySkipId("format", AsmToken::Colon)) 5686 return MatchOperand_NoMatch; 5687 5688 if (trySkipToken(AsmToken::LBrac)) { 5689 StringRef FormatStr; 5690 SMLoc Loc = getLoc(); 5691 if (!parseId(FormatStr, "expected a format string")) 5692 return MatchOperand_ParseFail; 5693 5694 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5695 if (Res == MatchOperand_NoMatch) 5696 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5697 if (Res != MatchOperand_Success) 5698 return Res; 5699 5700 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5701 return MatchOperand_ParseFail; 5702 5703 return MatchOperand_Success; 5704 } 5705 5706 return parseNumericFormat(Format); 5707 } 5708 5709 OperandMatchResultTy 5710 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5711 using namespace llvm::AMDGPU::MTBUFFormat; 5712 5713 int64_t Format = getDefaultFormatEncoding(getSTI()); 5714 OperandMatchResultTy Res; 5715 SMLoc Loc = getLoc(); 5716 5717 // Parse legacy format syntax. 5718 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5719 if (Res == MatchOperand_ParseFail) 5720 return Res; 5721 5722 bool FormatFound = (Res == MatchOperand_Success); 5723 5724 Operands.push_back( 5725 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5726 5727 if (FormatFound) 5728 trySkipToken(AsmToken::Comma); 5729 5730 if (isToken(AsmToken::EndOfStatement)) { 5731 // We are expecting an soffset operand, 5732 // but let matcher handle the error. 5733 return MatchOperand_Success; 5734 } 5735 5736 // Parse soffset. 5737 Res = parseRegOrImm(Operands); 5738 if (Res != MatchOperand_Success) 5739 return Res; 5740 5741 trySkipToken(AsmToken::Comma); 5742 5743 if (!FormatFound) { 5744 Res = parseSymbolicOrNumericFormat(Format); 5745 if (Res == MatchOperand_ParseFail) 5746 return Res; 5747 if (Res == MatchOperand_Success) { 5748 auto Size = Operands.size(); 5749 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5750 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5751 Op.setImm(Format); 5752 } 5753 return MatchOperand_Success; 5754 } 5755 5756 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5757 Error(getLoc(), "duplicate format"); 5758 return MatchOperand_ParseFail; 5759 } 5760 return MatchOperand_Success; 5761 } 5762 5763 //===----------------------------------------------------------------------===// 5764 // ds 5765 //===----------------------------------------------------------------------===// 5766 5767 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5768 const OperandVector &Operands) { 5769 OptionalImmIndexMap OptionalIdx; 5770 5771 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5772 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5773 5774 // Add the register arguments 5775 if (Op.isReg()) { 5776 Op.addRegOperands(Inst, 1); 5777 continue; 5778 } 5779 5780 // Handle optional arguments 5781 OptionalIdx[Op.getImmTy()] = i; 5782 } 5783 5784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5785 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5787 5788 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5789 } 5790 5791 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5792 bool IsGdsHardcoded) { 5793 OptionalImmIndexMap OptionalIdx; 5794 5795 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5796 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5797 5798 // Add the register arguments 5799 if (Op.isReg()) { 5800 Op.addRegOperands(Inst, 1); 5801 continue; 5802 } 5803 5804 if (Op.isToken() && Op.getToken() == "gds") { 5805 IsGdsHardcoded = true; 5806 continue; 5807 } 5808 5809 // Handle optional arguments 5810 OptionalIdx[Op.getImmTy()] = i; 5811 } 5812 5813 AMDGPUOperand::ImmTy OffsetType = 5814 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5815 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5816 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5817 AMDGPUOperand::ImmTyOffset; 5818 5819 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5820 5821 if (!IsGdsHardcoded) { 5822 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5823 } 5824 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5825 } 5826 5827 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5828 OptionalImmIndexMap OptionalIdx; 5829 5830 unsigned OperandIdx[4]; 5831 unsigned EnMask = 0; 5832 int SrcIdx = 0; 5833 5834 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5835 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5836 5837 // Add the register arguments 5838 if (Op.isReg()) { 5839 assert(SrcIdx < 4); 5840 OperandIdx[SrcIdx] = Inst.size(); 5841 Op.addRegOperands(Inst, 1); 5842 ++SrcIdx; 5843 continue; 5844 } 5845 5846 if (Op.isOff()) { 5847 assert(SrcIdx < 4); 5848 OperandIdx[SrcIdx] = Inst.size(); 5849 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5850 ++SrcIdx; 5851 continue; 5852 } 5853 5854 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5855 Op.addImmOperands(Inst, 1); 5856 continue; 5857 } 5858 5859 if (Op.isToken() && Op.getToken() == "done") 5860 continue; 5861 5862 // Handle optional arguments 5863 OptionalIdx[Op.getImmTy()] = i; 5864 } 5865 5866 assert(SrcIdx == 4); 5867 5868 bool Compr = false; 5869 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5870 Compr = true; 5871 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5872 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5873 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5874 } 5875 5876 for (auto i = 0; i < SrcIdx; ++i) { 5877 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5878 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5879 } 5880 } 5881 5882 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5884 5885 Inst.addOperand(MCOperand::createImm(EnMask)); 5886 } 5887 5888 //===----------------------------------------------------------------------===// 5889 // s_waitcnt 5890 //===----------------------------------------------------------------------===// 5891 5892 static bool 5893 encodeCnt( 5894 const AMDGPU::IsaVersion ISA, 5895 int64_t &IntVal, 5896 int64_t CntVal, 5897 bool Saturate, 5898 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5899 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5900 { 5901 bool Failed = false; 5902 5903 IntVal = encode(ISA, IntVal, CntVal); 5904 if (CntVal != decode(ISA, IntVal)) { 5905 if (Saturate) { 5906 IntVal = encode(ISA, IntVal, -1); 5907 } else { 5908 Failed = true; 5909 } 5910 } 5911 return Failed; 5912 } 5913 5914 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5915 5916 SMLoc CntLoc = getLoc(); 5917 StringRef CntName = getTokenStr(); 5918 5919 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5920 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5921 return false; 5922 5923 int64_t CntVal; 5924 SMLoc ValLoc = getLoc(); 5925 if (!parseExpr(CntVal)) 5926 return false; 5927 5928 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5929 5930 bool Failed = true; 5931 bool Sat = CntName.endswith("_sat"); 5932 5933 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5934 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5935 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5936 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5937 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5938 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5939 } else { 5940 Error(CntLoc, "invalid counter name " + CntName); 5941 return false; 5942 } 5943 5944 if (Failed) { 5945 Error(ValLoc, "too large value for " + CntName); 5946 return false; 5947 } 5948 5949 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5950 return false; 5951 5952 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5953 if (isToken(AsmToken::EndOfStatement)) { 5954 Error(getLoc(), "expected a counter name"); 5955 return false; 5956 } 5957 } 5958 5959 return true; 5960 } 5961 5962 OperandMatchResultTy 5963 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5964 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5965 int64_t Waitcnt = getWaitcntBitMask(ISA); 5966 SMLoc S = getLoc(); 5967 5968 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5969 while (!isToken(AsmToken::EndOfStatement)) { 5970 if (!parseCnt(Waitcnt)) 5971 return MatchOperand_ParseFail; 5972 } 5973 } else { 5974 if (!parseExpr(Waitcnt)) 5975 return MatchOperand_ParseFail; 5976 } 5977 5978 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5979 return MatchOperand_Success; 5980 } 5981 5982 bool 5983 AMDGPUOperand::isSWaitCnt() const { 5984 return isImm(); 5985 } 5986 5987 //===----------------------------------------------------------------------===// 5988 // hwreg 5989 //===----------------------------------------------------------------------===// 5990 5991 bool 5992 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5993 OperandInfoTy &Offset, 5994 OperandInfoTy &Width) { 5995 using namespace llvm::AMDGPU::Hwreg; 5996 5997 // The register may be specified by name or using a numeric code 5998 HwReg.Loc = getLoc(); 5999 if (isToken(AsmToken::Identifier) && 6000 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6001 HwReg.IsSymbolic = true; 6002 lex(); // skip register name 6003 } else if (!parseExpr(HwReg.Id, "a register name")) { 6004 return false; 6005 } 6006 6007 if (trySkipToken(AsmToken::RParen)) 6008 return true; 6009 6010 // parse optional params 6011 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6012 return false; 6013 6014 Offset.Loc = getLoc(); 6015 if (!parseExpr(Offset.Id)) 6016 return false; 6017 6018 if (!skipToken(AsmToken::Comma, "expected a comma")) 6019 return false; 6020 6021 Width.Loc = getLoc(); 6022 return parseExpr(Width.Id) && 6023 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6024 } 6025 6026 bool 6027 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6028 const OperandInfoTy &Offset, 6029 const OperandInfoTy &Width) { 6030 6031 using namespace llvm::AMDGPU::Hwreg; 6032 6033 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6034 Error(HwReg.Loc, 6035 "specified hardware register is not supported on this GPU"); 6036 return false; 6037 } 6038 if (!isValidHwreg(HwReg.Id)) { 6039 Error(HwReg.Loc, 6040 "invalid code of hardware register: only 6-bit values are legal"); 6041 return false; 6042 } 6043 if (!isValidHwregOffset(Offset.Id)) { 6044 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6045 return false; 6046 } 6047 if (!isValidHwregWidth(Width.Id)) { 6048 Error(Width.Loc, 6049 "invalid bitfield width: only values from 1 to 32 are legal"); 6050 return false; 6051 } 6052 return true; 6053 } 6054 6055 OperandMatchResultTy 6056 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6057 using namespace llvm::AMDGPU::Hwreg; 6058 6059 int64_t ImmVal = 0; 6060 SMLoc Loc = getLoc(); 6061 6062 if (trySkipId("hwreg", AsmToken::LParen)) { 6063 OperandInfoTy HwReg(ID_UNKNOWN_); 6064 OperandInfoTy Offset(OFFSET_DEFAULT_); 6065 OperandInfoTy Width(WIDTH_DEFAULT_); 6066 if (parseHwregBody(HwReg, Offset, Width) && 6067 validateHwreg(HwReg, Offset, Width)) { 6068 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6069 } else { 6070 return MatchOperand_ParseFail; 6071 } 6072 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6073 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6074 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6075 return MatchOperand_ParseFail; 6076 } 6077 } else { 6078 return MatchOperand_ParseFail; 6079 } 6080 6081 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6082 return MatchOperand_Success; 6083 } 6084 6085 bool AMDGPUOperand::isHwreg() const { 6086 return isImmTy(ImmTyHwreg); 6087 } 6088 6089 //===----------------------------------------------------------------------===// 6090 // sendmsg 6091 //===----------------------------------------------------------------------===// 6092 6093 bool 6094 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6095 OperandInfoTy &Op, 6096 OperandInfoTy &Stream) { 6097 using namespace llvm::AMDGPU::SendMsg; 6098 6099 Msg.Loc = getLoc(); 6100 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6101 Msg.IsSymbolic = true; 6102 lex(); // skip message name 6103 } else if (!parseExpr(Msg.Id, "a message name")) { 6104 return false; 6105 } 6106 6107 if (trySkipToken(AsmToken::Comma)) { 6108 Op.IsDefined = true; 6109 Op.Loc = getLoc(); 6110 if (isToken(AsmToken::Identifier) && 6111 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6112 lex(); // skip operation name 6113 } else if (!parseExpr(Op.Id, "an operation name")) { 6114 return false; 6115 } 6116 6117 if (trySkipToken(AsmToken::Comma)) { 6118 Stream.IsDefined = true; 6119 Stream.Loc = getLoc(); 6120 if (!parseExpr(Stream.Id)) 6121 return false; 6122 } 6123 } 6124 6125 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6126 } 6127 6128 bool 6129 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6130 const OperandInfoTy &Op, 6131 const OperandInfoTy &Stream) { 6132 using namespace llvm::AMDGPU::SendMsg; 6133 6134 // Validation strictness depends on whether message is specified 6135 // in a symbolc or in a numeric form. In the latter case 6136 // only encoding possibility is checked. 6137 bool Strict = Msg.IsSymbolic; 6138 6139 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6140 Error(Msg.Loc, "invalid message id"); 6141 return false; 6142 } 6143 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6144 if (Op.IsDefined) { 6145 Error(Op.Loc, "message does not support operations"); 6146 } else { 6147 Error(Msg.Loc, "missing message operation"); 6148 } 6149 return false; 6150 } 6151 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6152 Error(Op.Loc, "invalid operation id"); 6153 return false; 6154 } 6155 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6156 Error(Stream.Loc, "message operation does not support streams"); 6157 return false; 6158 } 6159 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6160 Error(Stream.Loc, "invalid message stream id"); 6161 return false; 6162 } 6163 return true; 6164 } 6165 6166 OperandMatchResultTy 6167 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6168 using namespace llvm::AMDGPU::SendMsg; 6169 6170 int64_t ImmVal = 0; 6171 SMLoc Loc = getLoc(); 6172 6173 if (trySkipId("sendmsg", AsmToken::LParen)) { 6174 OperandInfoTy Msg(ID_UNKNOWN_); 6175 OperandInfoTy Op(OP_NONE_); 6176 OperandInfoTy Stream(STREAM_ID_NONE_); 6177 if (parseSendMsgBody(Msg, Op, Stream) && 6178 validateSendMsg(Msg, Op, Stream)) { 6179 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6180 } else { 6181 return MatchOperand_ParseFail; 6182 } 6183 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6184 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6185 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6186 return MatchOperand_ParseFail; 6187 } 6188 } else { 6189 return MatchOperand_ParseFail; 6190 } 6191 6192 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6193 return MatchOperand_Success; 6194 } 6195 6196 bool AMDGPUOperand::isSendMsg() const { 6197 return isImmTy(ImmTySendMsg); 6198 } 6199 6200 //===----------------------------------------------------------------------===// 6201 // v_interp 6202 //===----------------------------------------------------------------------===// 6203 6204 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6205 StringRef Str; 6206 SMLoc S = getLoc(); 6207 6208 if (!parseId(Str)) 6209 return MatchOperand_NoMatch; 6210 6211 int Slot = StringSwitch<int>(Str) 6212 .Case("p10", 0) 6213 .Case("p20", 1) 6214 .Case("p0", 2) 6215 .Default(-1); 6216 6217 if (Slot == -1) { 6218 Error(S, "invalid interpolation slot"); 6219 return MatchOperand_ParseFail; 6220 } 6221 6222 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6223 AMDGPUOperand::ImmTyInterpSlot)); 6224 return MatchOperand_Success; 6225 } 6226 6227 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6228 StringRef Str; 6229 SMLoc S = getLoc(); 6230 6231 if (!parseId(Str)) 6232 return MatchOperand_NoMatch; 6233 6234 if (!Str.startswith("attr")) { 6235 Error(S, "invalid interpolation attribute"); 6236 return MatchOperand_ParseFail; 6237 } 6238 6239 StringRef Chan = Str.take_back(2); 6240 int AttrChan = StringSwitch<int>(Chan) 6241 .Case(".x", 0) 6242 .Case(".y", 1) 6243 .Case(".z", 2) 6244 .Case(".w", 3) 6245 .Default(-1); 6246 if (AttrChan == -1) { 6247 Error(S, "invalid or missing interpolation attribute channel"); 6248 return MatchOperand_ParseFail; 6249 } 6250 6251 Str = Str.drop_back(2).drop_front(4); 6252 6253 uint8_t Attr; 6254 if (Str.getAsInteger(10, Attr)) { 6255 Error(S, "invalid or missing interpolation attribute number"); 6256 return MatchOperand_ParseFail; 6257 } 6258 6259 if (Attr > 63) { 6260 Error(S, "out of bounds interpolation attribute number"); 6261 return MatchOperand_ParseFail; 6262 } 6263 6264 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6265 6266 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6267 AMDGPUOperand::ImmTyInterpAttr)); 6268 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6269 AMDGPUOperand::ImmTyAttrChan)); 6270 return MatchOperand_Success; 6271 } 6272 6273 //===----------------------------------------------------------------------===// 6274 // exp 6275 //===----------------------------------------------------------------------===// 6276 6277 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6278 using namespace llvm::AMDGPU::Exp; 6279 6280 StringRef Str; 6281 SMLoc S = getLoc(); 6282 6283 if (!parseId(Str)) 6284 return MatchOperand_NoMatch; 6285 6286 unsigned Id = getTgtId(Str); 6287 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6288 Error(S, (Id == ET_INVALID) ? 6289 "invalid exp target" : 6290 "exp target is not supported on this GPU"); 6291 return MatchOperand_ParseFail; 6292 } 6293 6294 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6295 AMDGPUOperand::ImmTyExpTgt)); 6296 return MatchOperand_Success; 6297 } 6298 6299 //===----------------------------------------------------------------------===// 6300 // parser helpers 6301 //===----------------------------------------------------------------------===// 6302 6303 bool 6304 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6305 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6306 } 6307 6308 bool 6309 AMDGPUAsmParser::isId(const StringRef Id) const { 6310 return isId(getToken(), Id); 6311 } 6312 6313 bool 6314 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6315 return getTokenKind() == Kind; 6316 } 6317 6318 bool 6319 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6320 if (isId(Id)) { 6321 lex(); 6322 return true; 6323 } 6324 return false; 6325 } 6326 6327 bool 6328 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6329 if (isToken(AsmToken::Identifier)) { 6330 StringRef Tok = getTokenStr(); 6331 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6332 lex(); 6333 return true; 6334 } 6335 } 6336 return false; 6337 } 6338 6339 bool 6340 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6341 if (isId(Id) && peekToken().is(Kind)) { 6342 lex(); 6343 lex(); 6344 return true; 6345 } 6346 return false; 6347 } 6348 6349 bool 6350 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6351 if (isToken(Kind)) { 6352 lex(); 6353 return true; 6354 } 6355 return false; 6356 } 6357 6358 bool 6359 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6360 const StringRef ErrMsg) { 6361 if (!trySkipToken(Kind)) { 6362 Error(getLoc(), ErrMsg); 6363 return false; 6364 } 6365 return true; 6366 } 6367 6368 bool 6369 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6370 SMLoc S = getLoc(); 6371 6372 const MCExpr *Expr; 6373 if (Parser.parseExpression(Expr)) 6374 return false; 6375 6376 if (Expr->evaluateAsAbsolute(Imm)) 6377 return true; 6378 6379 if (Expected.empty()) { 6380 Error(S, "expected absolute expression"); 6381 } else { 6382 Error(S, Twine("expected ", Expected) + 6383 Twine(" or an absolute expression")); 6384 } 6385 return false; 6386 } 6387 6388 bool 6389 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6390 SMLoc S = getLoc(); 6391 6392 const MCExpr *Expr; 6393 if (Parser.parseExpression(Expr)) 6394 return false; 6395 6396 int64_t IntVal; 6397 if (Expr->evaluateAsAbsolute(IntVal)) { 6398 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6399 } else { 6400 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6401 } 6402 return true; 6403 } 6404 6405 bool 6406 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6407 if (isToken(AsmToken::String)) { 6408 Val = getToken().getStringContents(); 6409 lex(); 6410 return true; 6411 } else { 6412 Error(getLoc(), ErrMsg); 6413 return false; 6414 } 6415 } 6416 6417 bool 6418 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6419 if (isToken(AsmToken::Identifier)) { 6420 Val = getTokenStr(); 6421 lex(); 6422 return true; 6423 } else { 6424 if (!ErrMsg.empty()) 6425 Error(getLoc(), ErrMsg); 6426 return false; 6427 } 6428 } 6429 6430 AsmToken 6431 AMDGPUAsmParser::getToken() const { 6432 return Parser.getTok(); 6433 } 6434 6435 AsmToken 6436 AMDGPUAsmParser::peekToken() { 6437 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6438 } 6439 6440 void 6441 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6442 auto TokCount = getLexer().peekTokens(Tokens); 6443 6444 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6445 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6446 } 6447 6448 AsmToken::TokenKind 6449 AMDGPUAsmParser::getTokenKind() const { 6450 return getLexer().getKind(); 6451 } 6452 6453 SMLoc 6454 AMDGPUAsmParser::getLoc() const { 6455 return getToken().getLoc(); 6456 } 6457 6458 StringRef 6459 AMDGPUAsmParser::getTokenStr() const { 6460 return getToken().getString(); 6461 } 6462 6463 void 6464 AMDGPUAsmParser::lex() { 6465 Parser.Lex(); 6466 } 6467 6468 SMLoc 6469 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6470 const OperandVector &Operands) const { 6471 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6472 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6473 if (Test(Op)) 6474 return Op.getStartLoc(); 6475 } 6476 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6477 } 6478 6479 SMLoc 6480 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6481 const OperandVector &Operands) const { 6482 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6483 return getOperandLoc(Test, Operands); 6484 } 6485 6486 SMLoc 6487 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6488 const OperandVector &Operands) const { 6489 auto Test = [=](const AMDGPUOperand& Op) { 6490 return Op.isRegKind() && Op.getReg() == Reg; 6491 }; 6492 return getOperandLoc(Test, Operands); 6493 } 6494 6495 SMLoc 6496 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6497 auto Test = [](const AMDGPUOperand& Op) { 6498 return Op.IsImmKindLiteral() || Op.isExpr(); 6499 }; 6500 return getOperandLoc(Test, Operands); 6501 } 6502 6503 SMLoc 6504 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6505 auto Test = [](const AMDGPUOperand& Op) { 6506 return Op.isImmKindConst(); 6507 }; 6508 return getOperandLoc(Test, Operands); 6509 } 6510 6511 //===----------------------------------------------------------------------===// 6512 // swizzle 6513 //===----------------------------------------------------------------------===// 6514 6515 LLVM_READNONE 6516 static unsigned 6517 encodeBitmaskPerm(const unsigned AndMask, 6518 const unsigned OrMask, 6519 const unsigned XorMask) { 6520 using namespace llvm::AMDGPU::Swizzle; 6521 6522 return BITMASK_PERM_ENC | 6523 (AndMask << BITMASK_AND_SHIFT) | 6524 (OrMask << BITMASK_OR_SHIFT) | 6525 (XorMask << BITMASK_XOR_SHIFT); 6526 } 6527 6528 bool 6529 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6530 const unsigned MinVal, 6531 const unsigned MaxVal, 6532 const StringRef ErrMsg, 6533 SMLoc &Loc) { 6534 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6535 return false; 6536 } 6537 Loc = getLoc(); 6538 if (!parseExpr(Op)) { 6539 return false; 6540 } 6541 if (Op < MinVal || Op > MaxVal) { 6542 Error(Loc, ErrMsg); 6543 return false; 6544 } 6545 6546 return true; 6547 } 6548 6549 bool 6550 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6551 const unsigned MinVal, 6552 const unsigned MaxVal, 6553 const StringRef ErrMsg) { 6554 SMLoc Loc; 6555 for (unsigned i = 0; i < OpNum; ++i) { 6556 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6557 return false; 6558 } 6559 6560 return true; 6561 } 6562 6563 bool 6564 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6565 using namespace llvm::AMDGPU::Swizzle; 6566 6567 int64_t Lane[LANE_NUM]; 6568 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6569 "expected a 2-bit lane id")) { 6570 Imm = QUAD_PERM_ENC; 6571 for (unsigned I = 0; I < LANE_NUM; ++I) { 6572 Imm |= Lane[I] << (LANE_SHIFT * I); 6573 } 6574 return true; 6575 } 6576 return false; 6577 } 6578 6579 bool 6580 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6581 using namespace llvm::AMDGPU::Swizzle; 6582 6583 SMLoc Loc; 6584 int64_t GroupSize; 6585 int64_t LaneIdx; 6586 6587 if (!parseSwizzleOperand(GroupSize, 6588 2, 32, 6589 "group size must be in the interval [2,32]", 6590 Loc)) { 6591 return false; 6592 } 6593 if (!isPowerOf2_64(GroupSize)) { 6594 Error(Loc, "group size must be a power of two"); 6595 return false; 6596 } 6597 if (parseSwizzleOperand(LaneIdx, 6598 0, GroupSize - 1, 6599 "lane id must be in the interval [0,group size - 1]", 6600 Loc)) { 6601 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6602 return true; 6603 } 6604 return false; 6605 } 6606 6607 bool 6608 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6609 using namespace llvm::AMDGPU::Swizzle; 6610 6611 SMLoc Loc; 6612 int64_t GroupSize; 6613 6614 if (!parseSwizzleOperand(GroupSize, 6615 2, 32, 6616 "group size must be in the interval [2,32]", 6617 Loc)) { 6618 return false; 6619 } 6620 if (!isPowerOf2_64(GroupSize)) { 6621 Error(Loc, "group size must be a power of two"); 6622 return false; 6623 } 6624 6625 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6626 return true; 6627 } 6628 6629 bool 6630 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6631 using namespace llvm::AMDGPU::Swizzle; 6632 6633 SMLoc Loc; 6634 int64_t GroupSize; 6635 6636 if (!parseSwizzleOperand(GroupSize, 6637 1, 16, 6638 "group size must be in the interval [1,16]", 6639 Loc)) { 6640 return false; 6641 } 6642 if (!isPowerOf2_64(GroupSize)) { 6643 Error(Loc, "group size must be a power of two"); 6644 return false; 6645 } 6646 6647 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6648 return true; 6649 } 6650 6651 bool 6652 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6653 using namespace llvm::AMDGPU::Swizzle; 6654 6655 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6656 return false; 6657 } 6658 6659 StringRef Ctl; 6660 SMLoc StrLoc = getLoc(); 6661 if (!parseString(Ctl)) { 6662 return false; 6663 } 6664 if (Ctl.size() != BITMASK_WIDTH) { 6665 Error(StrLoc, "expected a 5-character mask"); 6666 return false; 6667 } 6668 6669 unsigned AndMask = 0; 6670 unsigned OrMask = 0; 6671 unsigned XorMask = 0; 6672 6673 for (size_t i = 0; i < Ctl.size(); ++i) { 6674 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6675 switch(Ctl[i]) { 6676 default: 6677 Error(StrLoc, "invalid mask"); 6678 return false; 6679 case '0': 6680 break; 6681 case '1': 6682 OrMask |= Mask; 6683 break; 6684 case 'p': 6685 AndMask |= Mask; 6686 break; 6687 case 'i': 6688 AndMask |= Mask; 6689 XorMask |= Mask; 6690 break; 6691 } 6692 } 6693 6694 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6695 return true; 6696 } 6697 6698 bool 6699 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6700 6701 SMLoc OffsetLoc = getLoc(); 6702 6703 if (!parseExpr(Imm, "a swizzle macro")) { 6704 return false; 6705 } 6706 if (!isUInt<16>(Imm)) { 6707 Error(OffsetLoc, "expected a 16-bit offset"); 6708 return false; 6709 } 6710 return true; 6711 } 6712 6713 bool 6714 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6715 using namespace llvm::AMDGPU::Swizzle; 6716 6717 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6718 6719 SMLoc ModeLoc = getLoc(); 6720 bool Ok = false; 6721 6722 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6723 Ok = parseSwizzleQuadPerm(Imm); 6724 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6725 Ok = parseSwizzleBitmaskPerm(Imm); 6726 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6727 Ok = parseSwizzleBroadcast(Imm); 6728 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6729 Ok = parseSwizzleSwap(Imm); 6730 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6731 Ok = parseSwizzleReverse(Imm); 6732 } else { 6733 Error(ModeLoc, "expected a swizzle mode"); 6734 } 6735 6736 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6737 } 6738 6739 return false; 6740 } 6741 6742 OperandMatchResultTy 6743 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6744 SMLoc S = getLoc(); 6745 int64_t Imm = 0; 6746 6747 if (trySkipId("offset")) { 6748 6749 bool Ok = false; 6750 if (skipToken(AsmToken::Colon, "expected a colon")) { 6751 if (trySkipId("swizzle")) { 6752 Ok = parseSwizzleMacro(Imm); 6753 } else { 6754 Ok = parseSwizzleOffset(Imm); 6755 } 6756 } 6757 6758 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6759 6760 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6761 } else { 6762 // Swizzle "offset" operand is optional. 6763 // If it is omitted, try parsing other optional operands. 6764 return parseOptionalOpr(Operands); 6765 } 6766 } 6767 6768 bool 6769 AMDGPUOperand::isSwizzle() const { 6770 return isImmTy(ImmTySwizzle); 6771 } 6772 6773 //===----------------------------------------------------------------------===// 6774 // VGPR Index Mode 6775 //===----------------------------------------------------------------------===// 6776 6777 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6778 6779 using namespace llvm::AMDGPU::VGPRIndexMode; 6780 6781 if (trySkipToken(AsmToken::RParen)) { 6782 return OFF; 6783 } 6784 6785 int64_t Imm = 0; 6786 6787 while (true) { 6788 unsigned Mode = 0; 6789 SMLoc S = getLoc(); 6790 6791 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6792 if (trySkipId(IdSymbolic[ModeId])) { 6793 Mode = 1 << ModeId; 6794 break; 6795 } 6796 } 6797 6798 if (Mode == 0) { 6799 Error(S, (Imm == 0)? 6800 "expected a VGPR index mode or a closing parenthesis" : 6801 "expected a VGPR index mode"); 6802 return UNDEF; 6803 } 6804 6805 if (Imm & Mode) { 6806 Error(S, "duplicate VGPR index mode"); 6807 return UNDEF; 6808 } 6809 Imm |= Mode; 6810 6811 if (trySkipToken(AsmToken::RParen)) 6812 break; 6813 if (!skipToken(AsmToken::Comma, 6814 "expected a comma or a closing parenthesis")) 6815 return UNDEF; 6816 } 6817 6818 return Imm; 6819 } 6820 6821 OperandMatchResultTy 6822 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6823 6824 using namespace llvm::AMDGPU::VGPRIndexMode; 6825 6826 int64_t Imm = 0; 6827 SMLoc S = getLoc(); 6828 6829 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6830 Imm = parseGPRIdxMacro(); 6831 if (Imm == UNDEF) 6832 return MatchOperand_ParseFail; 6833 } else { 6834 if (getParser().parseAbsoluteExpression(Imm)) 6835 return MatchOperand_ParseFail; 6836 if (Imm < 0 || !isUInt<4>(Imm)) { 6837 Error(S, "invalid immediate: only 4-bit values are legal"); 6838 return MatchOperand_ParseFail; 6839 } 6840 } 6841 6842 Operands.push_back( 6843 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6844 return MatchOperand_Success; 6845 } 6846 6847 bool AMDGPUOperand::isGPRIdxMode() const { 6848 return isImmTy(ImmTyGprIdxMode); 6849 } 6850 6851 //===----------------------------------------------------------------------===// 6852 // sopp branch targets 6853 //===----------------------------------------------------------------------===// 6854 6855 OperandMatchResultTy 6856 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6857 6858 // Make sure we are not parsing something 6859 // that looks like a label or an expression but is not. 6860 // This will improve error messages. 6861 if (isRegister() || isModifier()) 6862 return MatchOperand_NoMatch; 6863 6864 if (!parseExpr(Operands)) 6865 return MatchOperand_ParseFail; 6866 6867 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6868 assert(Opr.isImm() || Opr.isExpr()); 6869 SMLoc Loc = Opr.getStartLoc(); 6870 6871 // Currently we do not support arbitrary expressions as branch targets. 6872 // Only labels and absolute expressions are accepted. 6873 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6874 Error(Loc, "expected an absolute expression or a label"); 6875 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6876 Error(Loc, "expected a 16-bit signed jump offset"); 6877 } 6878 6879 return MatchOperand_Success; 6880 } 6881 6882 //===----------------------------------------------------------------------===// 6883 // Boolean holding registers 6884 //===----------------------------------------------------------------------===// 6885 6886 OperandMatchResultTy 6887 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6888 return parseReg(Operands); 6889 } 6890 6891 //===----------------------------------------------------------------------===// 6892 // mubuf 6893 //===----------------------------------------------------------------------===// 6894 6895 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6896 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6897 } 6898 6899 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6900 const OperandVector &Operands, 6901 bool IsAtomic, 6902 bool IsLds) { 6903 bool IsLdsOpcode = IsLds; 6904 bool HasLdsModifier = false; 6905 OptionalImmIndexMap OptionalIdx; 6906 unsigned FirstOperandIdx = 1; 6907 bool IsAtomicReturn = false; 6908 6909 if (IsAtomic) { 6910 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6911 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6912 if (!Op.isCPol()) 6913 continue; 6914 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6915 break; 6916 } 6917 6918 if (!IsAtomicReturn) { 6919 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6920 if (NewOpc != -1) 6921 Inst.setOpcode(NewOpc); 6922 } 6923 6924 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6925 SIInstrFlags::IsAtomicRet; 6926 } 6927 6928 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6929 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6930 6931 // Add the register arguments 6932 if (Op.isReg()) { 6933 Op.addRegOperands(Inst, 1); 6934 // Insert a tied src for atomic return dst. 6935 // This cannot be postponed as subsequent calls to 6936 // addImmOperands rely on correct number of MC operands. 6937 if (IsAtomicReturn && i == FirstOperandIdx) 6938 Op.addRegOperands(Inst, 1); 6939 continue; 6940 } 6941 6942 // Handle the case where soffset is an immediate 6943 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6944 Op.addImmOperands(Inst, 1); 6945 continue; 6946 } 6947 6948 HasLdsModifier |= Op.isLDS(); 6949 6950 // Handle tokens like 'offen' which are sometimes hard-coded into the 6951 // asm string. There are no MCInst operands for these. 6952 if (Op.isToken()) { 6953 continue; 6954 } 6955 assert(Op.isImm()); 6956 6957 // Handle optional arguments 6958 OptionalIdx[Op.getImmTy()] = i; 6959 } 6960 6961 // This is a workaround for an llvm quirk which may result in an 6962 // incorrect instruction selection. Lds and non-lds versions of 6963 // MUBUF instructions are identical except that lds versions 6964 // have mandatory 'lds' modifier. However this modifier follows 6965 // optional modifiers and llvm asm matcher regards this 'lds' 6966 // modifier as an optional one. As a result, an lds version 6967 // of opcode may be selected even if it has no 'lds' modifier. 6968 if (IsLdsOpcode && !HasLdsModifier) { 6969 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6970 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6971 Inst.setOpcode(NoLdsOpcode); 6972 IsLdsOpcode = false; 6973 } 6974 } 6975 6976 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6977 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 6978 6979 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6980 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6981 } 6982 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6983 } 6984 6985 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6986 OptionalImmIndexMap OptionalIdx; 6987 6988 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6989 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6990 6991 // Add the register arguments 6992 if (Op.isReg()) { 6993 Op.addRegOperands(Inst, 1); 6994 continue; 6995 } 6996 6997 // Handle the case where soffset is an immediate 6998 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6999 Op.addImmOperands(Inst, 1); 7000 continue; 7001 } 7002 7003 // Handle tokens like 'offen' which are sometimes hard-coded into the 7004 // asm string. There are no MCInst operands for these. 7005 if (Op.isToken()) { 7006 continue; 7007 } 7008 assert(Op.isImm()); 7009 7010 // Handle optional arguments 7011 OptionalIdx[Op.getImmTy()] = i; 7012 } 7013 7014 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7015 AMDGPUOperand::ImmTyOffset); 7016 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7017 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7018 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7019 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7020 } 7021 7022 //===----------------------------------------------------------------------===// 7023 // mimg 7024 //===----------------------------------------------------------------------===// 7025 7026 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7027 bool IsAtomic) { 7028 unsigned I = 1; 7029 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7030 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7031 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7032 } 7033 7034 if (IsAtomic) { 7035 // Add src, same as dst 7036 assert(Desc.getNumDefs() == 1); 7037 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7038 } 7039 7040 OptionalImmIndexMap OptionalIdx; 7041 7042 for (unsigned E = Operands.size(); I != E; ++I) { 7043 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7044 7045 // Add the register arguments 7046 if (Op.isReg()) { 7047 Op.addRegOperands(Inst, 1); 7048 } else if (Op.isImmModifier()) { 7049 OptionalIdx[Op.getImmTy()] = I; 7050 } else if (!Op.isToken()) { 7051 llvm_unreachable("unexpected operand type"); 7052 } 7053 } 7054 7055 bool IsGFX10Plus = isGFX10Plus(); 7056 7057 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7058 if (IsGFX10Plus) 7059 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7060 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7061 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7062 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7063 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7064 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7065 if (IsGFX10Plus) 7066 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7067 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7068 if (!IsGFX10Plus) 7069 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7070 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7071 } 7072 7073 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7074 cvtMIMG(Inst, Operands, true); 7075 } 7076 7077 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7078 OptionalImmIndexMap OptionalIdx; 7079 bool IsAtomicReturn = false; 7080 7081 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7082 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7083 if (!Op.isCPol()) 7084 continue; 7085 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7086 break; 7087 } 7088 7089 if (!IsAtomicReturn) { 7090 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7091 if (NewOpc != -1) 7092 Inst.setOpcode(NewOpc); 7093 } 7094 7095 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7096 SIInstrFlags::IsAtomicRet; 7097 7098 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7099 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7100 7101 // Add the register arguments 7102 if (Op.isReg()) { 7103 Op.addRegOperands(Inst, 1); 7104 if (IsAtomicReturn && i == 1) 7105 Op.addRegOperands(Inst, 1); 7106 continue; 7107 } 7108 7109 // Handle the case where soffset is an immediate 7110 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7111 Op.addImmOperands(Inst, 1); 7112 continue; 7113 } 7114 7115 // Handle tokens like 'offen' which are sometimes hard-coded into the 7116 // asm string. There are no MCInst operands for these. 7117 if (Op.isToken()) { 7118 continue; 7119 } 7120 assert(Op.isImm()); 7121 7122 // Handle optional arguments 7123 OptionalIdx[Op.getImmTy()] = i; 7124 } 7125 7126 if ((int)Inst.getNumOperands() <= 7127 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7130 } 7131 7132 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7133 const OperandVector &Operands) { 7134 for (unsigned I = 1; I < Operands.size(); ++I) { 7135 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7136 if (Operand.isReg()) 7137 Operand.addRegOperands(Inst, 1); 7138 } 7139 7140 Inst.addOperand(MCOperand::createImm(1)); // a16 7141 } 7142 7143 //===----------------------------------------------------------------------===// 7144 // smrd 7145 //===----------------------------------------------------------------------===// 7146 7147 bool AMDGPUOperand::isSMRDOffset8() const { 7148 return isImm() && isUInt<8>(getImm()); 7149 } 7150 7151 bool AMDGPUOperand::isSMEMOffset() const { 7152 return isImm(); // Offset range is checked later by validator. 7153 } 7154 7155 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7156 // 32-bit literals are only supported on CI and we only want to use them 7157 // when the offset is > 8-bits. 7158 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7159 } 7160 7161 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7162 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7163 } 7164 7165 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7166 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7167 } 7168 7169 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7170 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7171 } 7172 7173 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7174 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7175 } 7176 7177 //===----------------------------------------------------------------------===// 7178 // vop3 7179 //===----------------------------------------------------------------------===// 7180 7181 static bool ConvertOmodMul(int64_t &Mul) { 7182 if (Mul != 1 && Mul != 2 && Mul != 4) 7183 return false; 7184 7185 Mul >>= 1; 7186 return true; 7187 } 7188 7189 static bool ConvertOmodDiv(int64_t &Div) { 7190 if (Div == 1) { 7191 Div = 0; 7192 return true; 7193 } 7194 7195 if (Div == 2) { 7196 Div = 3; 7197 return true; 7198 } 7199 7200 return false; 7201 } 7202 7203 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7204 // This is intentional and ensures compatibility with sp3. 7205 // See bug 35397 for details. 7206 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7207 if (BoundCtrl == 0 || BoundCtrl == 1) { 7208 BoundCtrl = 1; 7209 return true; 7210 } 7211 return false; 7212 } 7213 7214 // Note: the order in this table matches the order of operands in AsmString. 7215 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7216 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7217 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7218 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7219 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7220 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7221 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7222 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7223 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7224 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7225 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7226 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7227 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7228 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7229 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7230 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7231 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7232 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7233 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7234 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7235 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7236 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7237 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7238 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7239 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7240 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7241 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7242 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7243 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7244 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7245 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7246 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7247 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7248 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7249 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7250 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7251 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7252 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7253 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7254 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7255 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7256 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7257 }; 7258 7259 void AMDGPUAsmParser::onBeginOfFile() { 7260 if (!getParser().getStreamer().getTargetStreamer() || 7261 getSTI().getTargetTriple().getArch() == Triple::r600) 7262 return; 7263 7264 if (!getTargetStreamer().getTargetID()) 7265 getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString()); 7266 7267 if (isHsaAbiVersion3Or4(&getSTI())) 7268 getTargetStreamer().EmitDirectiveAMDGCNTarget(); 7269 } 7270 7271 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7272 7273 OperandMatchResultTy res = parseOptionalOpr(Operands); 7274 7275 // This is a hack to enable hardcoded mandatory operands which follow 7276 // optional operands. 7277 // 7278 // Current design assumes that all operands after the first optional operand 7279 // are also optional. However implementation of some instructions violates 7280 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7281 // 7282 // To alleviate this problem, we have to (implicitly) parse extra operands 7283 // to make sure autogenerated parser of custom operands never hit hardcoded 7284 // mandatory operands. 7285 7286 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7287 if (res != MatchOperand_Success || 7288 isToken(AsmToken::EndOfStatement)) 7289 break; 7290 7291 trySkipToken(AsmToken::Comma); 7292 res = parseOptionalOpr(Operands); 7293 } 7294 7295 return res; 7296 } 7297 7298 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7299 OperandMatchResultTy res; 7300 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7301 // try to parse any optional operand here 7302 if (Op.IsBit) { 7303 res = parseNamedBit(Op.Name, Operands, Op.Type); 7304 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7305 res = parseOModOperand(Operands); 7306 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7307 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7308 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7309 res = parseSDWASel(Operands, Op.Name, Op.Type); 7310 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7311 res = parseSDWADstUnused(Operands); 7312 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7313 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7314 Op.Type == AMDGPUOperand::ImmTyNegLo || 7315 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7316 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7317 Op.ConvertResult); 7318 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7319 res = parseDim(Operands); 7320 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7321 res = parseCPol(Operands); 7322 } else { 7323 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7324 } 7325 if (res != MatchOperand_NoMatch) { 7326 return res; 7327 } 7328 } 7329 return MatchOperand_NoMatch; 7330 } 7331 7332 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7333 StringRef Name = getTokenStr(); 7334 if (Name == "mul") { 7335 return parseIntWithPrefix("mul", Operands, 7336 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7337 } 7338 7339 if (Name == "div") { 7340 return parseIntWithPrefix("div", Operands, 7341 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7342 } 7343 7344 return MatchOperand_NoMatch; 7345 } 7346 7347 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7348 cvtVOP3P(Inst, Operands); 7349 7350 int Opc = Inst.getOpcode(); 7351 7352 int SrcNum; 7353 const int Ops[] = { AMDGPU::OpName::src0, 7354 AMDGPU::OpName::src1, 7355 AMDGPU::OpName::src2 }; 7356 for (SrcNum = 0; 7357 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7358 ++SrcNum); 7359 assert(SrcNum > 0); 7360 7361 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7362 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7363 7364 if ((OpSel & (1 << SrcNum)) != 0) { 7365 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7366 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7367 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7368 } 7369 } 7370 7371 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7372 // 1. This operand is input modifiers 7373 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7374 // 2. This is not last operand 7375 && Desc.NumOperands > (OpNum + 1) 7376 // 3. Next operand is register class 7377 && Desc.OpInfo[OpNum + 1].RegClass != -1 7378 // 4. Next register is not tied to any other operand 7379 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7380 } 7381 7382 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7383 { 7384 OptionalImmIndexMap OptionalIdx; 7385 unsigned Opc = Inst.getOpcode(); 7386 7387 unsigned I = 1; 7388 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7389 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7390 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7391 } 7392 7393 for (unsigned E = Operands.size(); I != E; ++I) { 7394 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7395 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7396 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7397 } else if (Op.isInterpSlot() || 7398 Op.isInterpAttr() || 7399 Op.isAttrChan()) { 7400 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7401 } else if (Op.isImmModifier()) { 7402 OptionalIdx[Op.getImmTy()] = I; 7403 } else { 7404 llvm_unreachable("unhandled operand type"); 7405 } 7406 } 7407 7408 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7409 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7410 } 7411 7412 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7413 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7414 } 7415 7416 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7417 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7418 } 7419 } 7420 7421 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7422 OptionalImmIndexMap &OptionalIdx) { 7423 unsigned Opc = Inst.getOpcode(); 7424 7425 unsigned I = 1; 7426 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7427 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7428 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7429 } 7430 7431 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7432 // This instruction has src modifiers 7433 for (unsigned E = Operands.size(); I != E; ++I) { 7434 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7435 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7436 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7437 } else if (Op.isImmModifier()) { 7438 OptionalIdx[Op.getImmTy()] = I; 7439 } else if (Op.isRegOrImm()) { 7440 Op.addRegOrImmOperands(Inst, 1); 7441 } else { 7442 llvm_unreachable("unhandled operand type"); 7443 } 7444 } 7445 } else { 7446 // No src modifiers 7447 for (unsigned E = Operands.size(); I != E; ++I) { 7448 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7449 if (Op.isMod()) { 7450 OptionalIdx[Op.getImmTy()] = I; 7451 } else { 7452 Op.addRegOrImmOperands(Inst, 1); 7453 } 7454 } 7455 } 7456 7457 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7459 } 7460 7461 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7463 } 7464 7465 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7466 // it has src2 register operand that is tied to dst operand 7467 // we don't allow modifiers for this operand in assembler so src2_modifiers 7468 // should be 0. 7469 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7470 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7471 Opc == AMDGPU::V_MAC_F32_e64_vi || 7472 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7473 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7474 Opc == AMDGPU::V_MAC_F16_e64_vi || 7475 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7476 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7477 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7478 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7479 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7480 auto it = Inst.begin(); 7481 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7482 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7483 ++it; 7484 // Copy the operand to ensure it's not invalidated when Inst grows. 7485 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7486 } 7487 } 7488 7489 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7490 OptionalImmIndexMap OptionalIdx; 7491 cvtVOP3(Inst, Operands, OptionalIdx); 7492 } 7493 7494 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7495 const OperandVector &Operands) { 7496 OptionalImmIndexMap OptIdx; 7497 const int Opc = Inst.getOpcode(); 7498 const MCInstrDesc &Desc = MII.get(Opc); 7499 7500 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7501 7502 cvtVOP3(Inst, Operands, OptIdx); 7503 7504 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7505 assert(!IsPacked); 7506 Inst.addOperand(Inst.getOperand(0)); 7507 } 7508 7509 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7510 // instruction, and then figure out where to actually put the modifiers 7511 7512 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7513 7514 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7515 if (OpSelHiIdx != -1) { 7516 int DefaultVal = IsPacked ? -1 : 0; 7517 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7518 DefaultVal); 7519 } 7520 7521 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7522 if (NegLoIdx != -1) { 7523 assert(IsPacked); 7524 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7525 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7526 } 7527 7528 const int Ops[] = { AMDGPU::OpName::src0, 7529 AMDGPU::OpName::src1, 7530 AMDGPU::OpName::src2 }; 7531 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7532 AMDGPU::OpName::src1_modifiers, 7533 AMDGPU::OpName::src2_modifiers }; 7534 7535 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7536 7537 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7538 unsigned OpSelHi = 0; 7539 unsigned NegLo = 0; 7540 unsigned NegHi = 0; 7541 7542 if (OpSelHiIdx != -1) { 7543 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7544 } 7545 7546 if (NegLoIdx != -1) { 7547 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7548 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7549 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7550 } 7551 7552 for (int J = 0; J < 3; ++J) { 7553 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7554 if (OpIdx == -1) 7555 break; 7556 7557 uint32_t ModVal = 0; 7558 7559 if ((OpSel & (1 << J)) != 0) 7560 ModVal |= SISrcMods::OP_SEL_0; 7561 7562 if ((OpSelHi & (1 << J)) != 0) 7563 ModVal |= SISrcMods::OP_SEL_1; 7564 7565 if ((NegLo & (1 << J)) != 0) 7566 ModVal |= SISrcMods::NEG; 7567 7568 if ((NegHi & (1 << J)) != 0) 7569 ModVal |= SISrcMods::NEG_HI; 7570 7571 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7572 7573 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7574 } 7575 } 7576 7577 //===----------------------------------------------------------------------===// 7578 // dpp 7579 //===----------------------------------------------------------------------===// 7580 7581 bool AMDGPUOperand::isDPP8() const { 7582 return isImmTy(ImmTyDPP8); 7583 } 7584 7585 bool AMDGPUOperand::isDPPCtrl() const { 7586 using namespace AMDGPU::DPP; 7587 7588 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7589 if (result) { 7590 int64_t Imm = getImm(); 7591 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7592 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7593 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7594 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7595 (Imm == DppCtrl::WAVE_SHL1) || 7596 (Imm == DppCtrl::WAVE_ROL1) || 7597 (Imm == DppCtrl::WAVE_SHR1) || 7598 (Imm == DppCtrl::WAVE_ROR1) || 7599 (Imm == DppCtrl::ROW_MIRROR) || 7600 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7601 (Imm == DppCtrl::BCAST15) || 7602 (Imm == DppCtrl::BCAST31) || 7603 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7604 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7605 } 7606 return false; 7607 } 7608 7609 //===----------------------------------------------------------------------===// 7610 // mAI 7611 //===----------------------------------------------------------------------===// 7612 7613 bool AMDGPUOperand::isBLGP() const { 7614 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7615 } 7616 7617 bool AMDGPUOperand::isCBSZ() const { 7618 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7619 } 7620 7621 bool AMDGPUOperand::isABID() const { 7622 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7623 } 7624 7625 bool AMDGPUOperand::isS16Imm() const { 7626 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7627 } 7628 7629 bool AMDGPUOperand::isU16Imm() const { 7630 return isImm() && isUInt<16>(getImm()); 7631 } 7632 7633 //===----------------------------------------------------------------------===// 7634 // dim 7635 //===----------------------------------------------------------------------===// 7636 7637 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7638 // We want to allow "dim:1D" etc., 7639 // but the initial 1 is tokenized as an integer. 7640 std::string Token; 7641 if (isToken(AsmToken::Integer)) { 7642 SMLoc Loc = getToken().getEndLoc(); 7643 Token = std::string(getTokenStr()); 7644 lex(); 7645 if (getLoc() != Loc) 7646 return false; 7647 } 7648 7649 StringRef Suffix; 7650 if (!parseId(Suffix)) 7651 return false; 7652 Token += Suffix; 7653 7654 StringRef DimId = Token; 7655 if (DimId.startswith("SQ_RSRC_IMG_")) 7656 DimId = DimId.drop_front(12); 7657 7658 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7659 if (!DimInfo) 7660 return false; 7661 7662 Encoding = DimInfo->Encoding; 7663 return true; 7664 } 7665 7666 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7667 if (!isGFX10Plus()) 7668 return MatchOperand_NoMatch; 7669 7670 SMLoc S = getLoc(); 7671 7672 if (!trySkipId("dim", AsmToken::Colon)) 7673 return MatchOperand_NoMatch; 7674 7675 unsigned Encoding; 7676 SMLoc Loc = getLoc(); 7677 if (!parseDimId(Encoding)) { 7678 Error(Loc, "invalid dim value"); 7679 return MatchOperand_ParseFail; 7680 } 7681 7682 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7683 AMDGPUOperand::ImmTyDim)); 7684 return MatchOperand_Success; 7685 } 7686 7687 //===----------------------------------------------------------------------===// 7688 // dpp 7689 //===----------------------------------------------------------------------===// 7690 7691 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7692 SMLoc S = getLoc(); 7693 7694 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7695 return MatchOperand_NoMatch; 7696 7697 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7698 7699 int64_t Sels[8]; 7700 7701 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7702 return MatchOperand_ParseFail; 7703 7704 for (size_t i = 0; i < 8; ++i) { 7705 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7706 return MatchOperand_ParseFail; 7707 7708 SMLoc Loc = getLoc(); 7709 if (getParser().parseAbsoluteExpression(Sels[i])) 7710 return MatchOperand_ParseFail; 7711 if (0 > Sels[i] || 7 < Sels[i]) { 7712 Error(Loc, "expected a 3-bit value"); 7713 return MatchOperand_ParseFail; 7714 } 7715 } 7716 7717 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7718 return MatchOperand_ParseFail; 7719 7720 unsigned DPP8 = 0; 7721 for (size_t i = 0; i < 8; ++i) 7722 DPP8 |= (Sels[i] << (i * 3)); 7723 7724 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7725 return MatchOperand_Success; 7726 } 7727 7728 bool 7729 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7730 const OperandVector &Operands) { 7731 if (Ctrl == "row_newbcast") 7732 return isGFX90A(); 7733 7734 // DPP64 is supported for row_newbcast only. 7735 const MCRegisterInfo *MRI = getMRI(); 7736 if (Operands.size() > 2 && Operands[1]->isReg() && 7737 MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1)) 7738 return false; 7739 7740 if (Ctrl == "row_share" || 7741 Ctrl == "row_xmask") 7742 return isGFX10Plus(); 7743 7744 if (Ctrl == "wave_shl" || 7745 Ctrl == "wave_shr" || 7746 Ctrl == "wave_rol" || 7747 Ctrl == "wave_ror" || 7748 Ctrl == "row_bcast") 7749 return isVI() || isGFX9(); 7750 7751 return Ctrl == "row_mirror" || 7752 Ctrl == "row_half_mirror" || 7753 Ctrl == "quad_perm" || 7754 Ctrl == "row_shl" || 7755 Ctrl == "row_shr" || 7756 Ctrl == "row_ror"; 7757 } 7758 7759 int64_t 7760 AMDGPUAsmParser::parseDPPCtrlPerm() { 7761 // quad_perm:[%d,%d,%d,%d] 7762 7763 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7764 return -1; 7765 7766 int64_t Val = 0; 7767 for (int i = 0; i < 4; ++i) { 7768 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7769 return -1; 7770 7771 int64_t Temp; 7772 SMLoc Loc = getLoc(); 7773 if (getParser().parseAbsoluteExpression(Temp)) 7774 return -1; 7775 if (Temp < 0 || Temp > 3) { 7776 Error(Loc, "expected a 2-bit value"); 7777 return -1; 7778 } 7779 7780 Val += (Temp << i * 2); 7781 } 7782 7783 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7784 return -1; 7785 7786 return Val; 7787 } 7788 7789 int64_t 7790 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7791 using namespace AMDGPU::DPP; 7792 7793 // sel:%d 7794 7795 int64_t Val; 7796 SMLoc Loc = getLoc(); 7797 7798 if (getParser().parseAbsoluteExpression(Val)) 7799 return -1; 7800 7801 struct DppCtrlCheck { 7802 int64_t Ctrl; 7803 int Lo; 7804 int Hi; 7805 }; 7806 7807 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7808 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7809 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7810 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7811 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7812 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7813 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7814 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7815 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7816 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7817 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7818 .Default({-1, 0, 0}); 7819 7820 bool Valid; 7821 if (Check.Ctrl == -1) { 7822 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7823 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7824 } else { 7825 Valid = Check.Lo <= Val && Val <= Check.Hi; 7826 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7827 } 7828 7829 if (!Valid) { 7830 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7831 return -1; 7832 } 7833 7834 return Val; 7835 } 7836 7837 OperandMatchResultTy 7838 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7839 using namespace AMDGPU::DPP; 7840 7841 if (!isToken(AsmToken::Identifier) || 7842 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7843 return MatchOperand_NoMatch; 7844 7845 SMLoc S = getLoc(); 7846 int64_t Val = -1; 7847 StringRef Ctrl; 7848 7849 parseId(Ctrl); 7850 7851 if (Ctrl == "row_mirror") { 7852 Val = DppCtrl::ROW_MIRROR; 7853 } else if (Ctrl == "row_half_mirror") { 7854 Val = DppCtrl::ROW_HALF_MIRROR; 7855 } else { 7856 if (skipToken(AsmToken::Colon, "expected a colon")) { 7857 if (Ctrl == "quad_perm") { 7858 Val = parseDPPCtrlPerm(); 7859 } else { 7860 Val = parseDPPCtrlSel(Ctrl); 7861 } 7862 } 7863 } 7864 7865 if (Val == -1) 7866 return MatchOperand_ParseFail; 7867 7868 Operands.push_back( 7869 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7870 return MatchOperand_Success; 7871 } 7872 7873 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7874 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7875 } 7876 7877 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7878 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7879 } 7880 7881 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7882 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7883 } 7884 7885 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7886 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7887 } 7888 7889 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7890 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7891 } 7892 7893 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7894 OptionalImmIndexMap OptionalIdx; 7895 7896 unsigned I = 1; 7897 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7898 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7899 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7900 } 7901 7902 int Fi = 0; 7903 for (unsigned E = Operands.size(); I != E; ++I) { 7904 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7905 MCOI::TIED_TO); 7906 if (TiedTo != -1) { 7907 assert((unsigned)TiedTo < Inst.getNumOperands()); 7908 // handle tied old or src2 for MAC instructions 7909 Inst.addOperand(Inst.getOperand(TiedTo)); 7910 } 7911 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7912 // Add the register arguments 7913 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7914 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7915 // Skip it. 7916 continue; 7917 } 7918 7919 if (IsDPP8) { 7920 if (Op.isDPP8()) { 7921 Op.addImmOperands(Inst, 1); 7922 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7923 Op.addRegWithFPInputModsOperands(Inst, 2); 7924 } else if (Op.isFI()) { 7925 Fi = Op.getImm(); 7926 } else if (Op.isReg()) { 7927 Op.addRegOperands(Inst, 1); 7928 } else { 7929 llvm_unreachable("Invalid operand type"); 7930 } 7931 } else { 7932 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7933 Op.addRegWithFPInputModsOperands(Inst, 2); 7934 } else if (Op.isDPPCtrl()) { 7935 Op.addImmOperands(Inst, 1); 7936 } else if (Op.isImm()) { 7937 // Handle optional arguments 7938 OptionalIdx[Op.getImmTy()] = I; 7939 } else { 7940 llvm_unreachable("Invalid operand type"); 7941 } 7942 } 7943 } 7944 7945 if (IsDPP8) { 7946 using namespace llvm::AMDGPU::DPP; 7947 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7948 } else { 7949 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7952 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7953 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7954 } 7955 } 7956 } 7957 7958 //===----------------------------------------------------------------------===// 7959 // sdwa 7960 //===----------------------------------------------------------------------===// 7961 7962 OperandMatchResultTy 7963 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7964 AMDGPUOperand::ImmTy Type) { 7965 using namespace llvm::AMDGPU::SDWA; 7966 7967 SMLoc S = getLoc(); 7968 StringRef Value; 7969 OperandMatchResultTy res; 7970 7971 SMLoc StringLoc; 7972 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7973 if (res != MatchOperand_Success) { 7974 return res; 7975 } 7976 7977 int64_t Int; 7978 Int = StringSwitch<int64_t>(Value) 7979 .Case("BYTE_0", SdwaSel::BYTE_0) 7980 .Case("BYTE_1", SdwaSel::BYTE_1) 7981 .Case("BYTE_2", SdwaSel::BYTE_2) 7982 .Case("BYTE_3", SdwaSel::BYTE_3) 7983 .Case("WORD_0", SdwaSel::WORD_0) 7984 .Case("WORD_1", SdwaSel::WORD_1) 7985 .Case("DWORD", SdwaSel::DWORD) 7986 .Default(0xffffffff); 7987 7988 if (Int == 0xffffffff) { 7989 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7990 return MatchOperand_ParseFail; 7991 } 7992 7993 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7994 return MatchOperand_Success; 7995 } 7996 7997 OperandMatchResultTy 7998 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7999 using namespace llvm::AMDGPU::SDWA; 8000 8001 SMLoc S = getLoc(); 8002 StringRef Value; 8003 OperandMatchResultTy res; 8004 8005 SMLoc StringLoc; 8006 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8007 if (res != MatchOperand_Success) { 8008 return res; 8009 } 8010 8011 int64_t Int; 8012 Int = StringSwitch<int64_t>(Value) 8013 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8014 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8015 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8016 .Default(0xffffffff); 8017 8018 if (Int == 0xffffffff) { 8019 Error(StringLoc, "invalid dst_unused value"); 8020 return MatchOperand_ParseFail; 8021 } 8022 8023 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8024 return MatchOperand_Success; 8025 } 8026 8027 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8028 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8029 } 8030 8031 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8032 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8033 } 8034 8035 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8036 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8037 } 8038 8039 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8040 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8041 } 8042 8043 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8044 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8045 } 8046 8047 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8048 uint64_t BasicInstType, 8049 bool SkipDstVcc, 8050 bool SkipSrcVcc) { 8051 using namespace llvm::AMDGPU::SDWA; 8052 8053 OptionalImmIndexMap OptionalIdx; 8054 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8055 bool SkippedVcc = false; 8056 8057 unsigned I = 1; 8058 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8059 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8060 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8061 } 8062 8063 for (unsigned E = Operands.size(); I != E; ++I) { 8064 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8065 if (SkipVcc && !SkippedVcc && Op.isReg() && 8066 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8067 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8068 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8069 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8070 // Skip VCC only if we didn't skip it on previous iteration. 8071 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8072 if (BasicInstType == SIInstrFlags::VOP2 && 8073 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8074 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8075 SkippedVcc = true; 8076 continue; 8077 } else if (BasicInstType == SIInstrFlags::VOPC && 8078 Inst.getNumOperands() == 0) { 8079 SkippedVcc = true; 8080 continue; 8081 } 8082 } 8083 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8084 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8085 } else if (Op.isImm()) { 8086 // Handle optional arguments 8087 OptionalIdx[Op.getImmTy()] = I; 8088 } else { 8089 llvm_unreachable("Invalid operand type"); 8090 } 8091 SkippedVcc = false; 8092 } 8093 8094 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8095 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8096 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8097 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8098 switch (BasicInstType) { 8099 case SIInstrFlags::VOP1: 8100 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8101 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8102 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8103 } 8104 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8105 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8106 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8107 break; 8108 8109 case SIInstrFlags::VOP2: 8110 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8111 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8112 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8113 } 8114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8116 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8118 break; 8119 8120 case SIInstrFlags::VOPC: 8121 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8125 break; 8126 8127 default: 8128 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8129 } 8130 } 8131 8132 // special case v_mac_{f16, f32}: 8133 // it has src2 register operand that is tied to dst operand 8134 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8135 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8136 auto it = Inst.begin(); 8137 std::advance( 8138 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8139 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8140 } 8141 } 8142 8143 //===----------------------------------------------------------------------===// 8144 // mAI 8145 //===----------------------------------------------------------------------===// 8146 8147 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8148 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8149 } 8150 8151 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8152 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8153 } 8154 8155 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8156 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8157 } 8158 8159 /// Force static initialization. 8160 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8161 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8162 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8163 } 8164 8165 #define GET_REGISTER_MATCHER 8166 #define GET_MATCHER_IMPLEMENTATION 8167 #define GET_MNEMONIC_SPELL_CHECKER 8168 #define GET_MNEMONIC_CHECKER 8169 #include "AMDGPUGenAsmMatcher.inc" 8170 8171 // This fuction should be defined after auto-generated include so that we have 8172 // MatchClassKind enum defined 8173 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8174 unsigned Kind) { 8175 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8176 // But MatchInstructionImpl() expects to meet token and fails to validate 8177 // operand. This method checks if we are given immediate operand but expect to 8178 // get corresponding token. 8179 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8180 switch (Kind) { 8181 case MCK_addr64: 8182 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8183 case MCK_gds: 8184 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8185 case MCK_lds: 8186 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8187 case MCK_idxen: 8188 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8189 case MCK_offen: 8190 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8191 case MCK_SSrcB32: 8192 // When operands have expression values, they will return true for isToken, 8193 // because it is not possible to distinguish between a token and an 8194 // expression at parse time. MatchInstructionImpl() will always try to 8195 // match an operand as a token, when isToken returns true, and when the 8196 // name of the expression is not a valid token, the match will fail, 8197 // so we need to handle it here. 8198 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8199 case MCK_SSrcF32: 8200 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8201 case MCK_SoppBrTarget: 8202 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8203 case MCK_VReg32OrOff: 8204 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8205 case MCK_InterpSlot: 8206 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8207 case MCK_Attr: 8208 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8209 case MCK_AttrChan: 8210 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8211 case MCK_ImmSMEMOffset: 8212 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8213 case MCK_SReg_64: 8214 case MCK_SReg_64_XEXEC: 8215 // Null is defined as a 32-bit register but 8216 // it should also be enabled with 64-bit operands. 8217 // The following code enables it for SReg_64 operands 8218 // used as source and destination. Remaining source 8219 // operands are handled in isInlinableImm. 8220 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8221 default: 8222 return Match_InvalidOperand; 8223 } 8224 } 8225 8226 //===----------------------------------------------------------------------===// 8227 // endpgm 8228 //===----------------------------------------------------------------------===// 8229 8230 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8231 SMLoc S = getLoc(); 8232 int64_t Imm = 0; 8233 8234 if (!parseExpr(Imm)) { 8235 // The operand is optional, if not present default to 0 8236 Imm = 0; 8237 } 8238 8239 if (!isUInt<16>(Imm)) { 8240 Error(S, "expected a 16-bit value"); 8241 return MatchOperand_ParseFail; 8242 } 8243 8244 Operands.push_back( 8245 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8246 return MatchOperand_Success; 8247 } 8248 8249 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8250