1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "TargetInfo/AMDGPUTargetInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/SmallBitVector.h" 20 #include "llvm/ADT/StringSet.h" 21 #include "llvm/ADT/Twine.h" 22 #include "llvm/MC/MCAsmInfo.h" 23 #include "llvm/MC/MCContext.h" 24 #include "llvm/MC/MCExpr.h" 25 #include "llvm/MC/MCInst.h" 26 #include "llvm/MC/MCParser/MCAsmParser.h" 27 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 28 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 29 #include "llvm/MC/MCSymbol.h" 30 #include "llvm/Support/AMDGPUMetadata.h" 31 #include "llvm/Support/AMDHSAKernelDescriptor.h" 32 #include "llvm/Support/Casting.h" 33 #include "llvm/Support/MachineValueType.h" 34 #include "llvm/Support/TargetParser.h" 35 #include "llvm/Support/TargetRegistry.h" 36 37 using namespace llvm; 38 using namespace llvm::AMDGPU; 39 using namespace llvm::amdhsa; 40 41 namespace { 42 43 class AMDGPUAsmParser; 44 45 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 46 47 //===----------------------------------------------------------------------===// 48 // Operand 49 //===----------------------------------------------------------------------===// 50 51 class AMDGPUOperand : public MCParsedAsmOperand { 52 enum KindTy { 53 Token, 54 Immediate, 55 Register, 56 Expression 57 } Kind; 58 59 SMLoc StartLoc, EndLoc; 60 const AMDGPUAsmParser *AsmParser; 61 62 public: 63 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 64 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 65 66 using Ptr = std::unique_ptr<AMDGPUOperand>; 67 68 struct Modifiers { 69 bool Abs = false; 70 bool Neg = false; 71 bool Sext = false; 72 73 bool hasFPModifiers() const { return Abs || Neg; } 74 bool hasIntModifiers() const { return Sext; } 75 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 76 77 int64_t getFPModifiersOperand() const { 78 int64_t Operand = 0; 79 Operand |= Abs ? SISrcMods::ABS : 0u; 80 Operand |= Neg ? SISrcMods::NEG : 0u; 81 return Operand; 82 } 83 84 int64_t getIntModifiersOperand() const { 85 int64_t Operand = 0; 86 Operand |= Sext ? SISrcMods::SEXT : 0u; 87 return Operand; 88 } 89 90 int64_t getModifiersOperand() const { 91 assert(!(hasFPModifiers() && hasIntModifiers()) 92 && "fp and int modifiers should not be used simultaneously"); 93 if (hasFPModifiers()) { 94 return getFPModifiersOperand(); 95 } else if (hasIntModifiers()) { 96 return getIntModifiersOperand(); 97 } else { 98 return 0; 99 } 100 } 101 102 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 103 }; 104 105 enum ImmTy { 106 ImmTyNone, 107 ImmTyGDS, 108 ImmTyLDS, 109 ImmTyOffen, 110 ImmTyIdxen, 111 ImmTyAddr64, 112 ImmTyOffset, 113 ImmTyInstOffset, 114 ImmTyOffset0, 115 ImmTyOffset1, 116 ImmTyDLC, 117 ImmTyGLC, 118 ImmTySLC, 119 ImmTySWZ, 120 ImmTyTFE, 121 ImmTyD16, 122 ImmTyClampSI, 123 ImmTyOModSI, 124 ImmTyDPP8, 125 ImmTyDppCtrl, 126 ImmTyDppRowMask, 127 ImmTyDppBankMask, 128 ImmTyDppBoundCtrl, 129 ImmTyDppFi, 130 ImmTySdwaDstSel, 131 ImmTySdwaSrc0Sel, 132 ImmTySdwaSrc1Sel, 133 ImmTySdwaDstUnused, 134 ImmTyDMask, 135 ImmTyDim, 136 ImmTyUNorm, 137 ImmTyDA, 138 ImmTyR128A16, 139 ImmTyA16, 140 ImmTyLWE, 141 ImmTyExpTgt, 142 ImmTyExpCompr, 143 ImmTyExpVM, 144 ImmTyFORMAT, 145 ImmTyHwreg, 146 ImmTyOff, 147 ImmTySendMsg, 148 ImmTyInterpSlot, 149 ImmTyInterpAttr, 150 ImmTyAttrChan, 151 ImmTyOpSel, 152 ImmTyOpSelHi, 153 ImmTyNegLo, 154 ImmTyNegHi, 155 ImmTySwizzle, 156 ImmTyGprIdxMode, 157 ImmTyHigh, 158 ImmTyBLGP, 159 ImmTyCBSZ, 160 ImmTyABID, 161 ImmTyEndpgm, 162 }; 163 164 enum ImmKindTy { 165 ImmKindTyNone, 166 ImmKindTyLiteral, 167 ImmKindTyConst, 168 }; 169 170 private: 171 struct TokOp { 172 const char *Data; 173 unsigned Length; 174 }; 175 176 struct ImmOp { 177 int64_t Val; 178 ImmTy Type; 179 bool IsFPImm; 180 mutable ImmKindTy Kind; 181 Modifiers Mods; 182 }; 183 184 struct RegOp { 185 unsigned RegNo; 186 Modifiers Mods; 187 }; 188 189 union { 190 TokOp Tok; 191 ImmOp Imm; 192 RegOp Reg; 193 const MCExpr *Expr; 194 }; 195 196 public: 197 bool isToken() const override { 198 if (Kind == Token) 199 return true; 200 201 // When parsing operands, we can't always tell if something was meant to be 202 // a token, like 'gds', or an expression that references a global variable. 203 // In this case, we assume the string is an expression, and if we need to 204 // interpret is a token, then we treat the symbol name as the token. 205 return isSymbolRefExpr(); 206 } 207 208 bool isSymbolRefExpr() const { 209 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 210 } 211 212 bool isImm() const override { 213 return Kind == Immediate; 214 } 215 216 void setImmKindNone() const { 217 assert(isImm()); 218 Imm.Kind = ImmKindTyNone; 219 } 220 221 void setImmKindLiteral() const { 222 assert(isImm()); 223 Imm.Kind = ImmKindTyLiteral; 224 } 225 226 void setImmKindConst() const { 227 assert(isImm()); 228 Imm.Kind = ImmKindTyConst; 229 } 230 231 bool IsImmKindLiteral() const { 232 return isImm() && Imm.Kind == ImmKindTyLiteral; 233 } 234 235 bool isImmKindConst() const { 236 return isImm() && Imm.Kind == ImmKindTyConst; 237 } 238 239 bool isInlinableImm(MVT type) const; 240 bool isLiteralImm(MVT type) const; 241 242 bool isRegKind() const { 243 return Kind == Register; 244 } 245 246 bool isReg() const override { 247 return isRegKind() && !hasModifiers(); 248 } 249 250 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 251 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 252 } 253 254 bool isRegOrImmWithInt16InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 256 } 257 258 bool isRegOrImmWithInt32InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 260 } 261 262 bool isRegOrImmWithInt64InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 264 } 265 266 bool isRegOrImmWithFP16InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 268 } 269 270 bool isRegOrImmWithFP32InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 272 } 273 274 bool isRegOrImmWithFP64InputMods() const { 275 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 276 } 277 278 bool isVReg() const { 279 return isRegClass(AMDGPU::VGPR_32RegClassID) || 280 isRegClass(AMDGPU::VReg_64RegClassID) || 281 isRegClass(AMDGPU::VReg_96RegClassID) || 282 isRegClass(AMDGPU::VReg_128RegClassID) || 283 isRegClass(AMDGPU::VReg_160RegClassID) || 284 isRegClass(AMDGPU::VReg_192RegClassID) || 285 isRegClass(AMDGPU::VReg_256RegClassID) || 286 isRegClass(AMDGPU::VReg_512RegClassID) || 287 isRegClass(AMDGPU::VReg_1024RegClassID); 288 } 289 290 bool isVReg32() const { 291 return isRegClass(AMDGPU::VGPR_32RegClassID); 292 } 293 294 bool isVReg32OrOff() const { 295 return isOff() || isVReg32(); 296 } 297 298 bool isNull() const { 299 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 300 } 301 302 bool isSDWAOperand(MVT type) const; 303 bool isSDWAFP16Operand() const; 304 bool isSDWAFP32Operand() const; 305 bool isSDWAInt16Operand() const; 306 bool isSDWAInt32Operand() const; 307 308 bool isImmTy(ImmTy ImmT) const { 309 return isImm() && Imm.Type == ImmT; 310 } 311 312 bool isImmModifier() const { 313 return isImm() && Imm.Type != ImmTyNone; 314 } 315 316 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 317 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 318 bool isDMask() const { return isImmTy(ImmTyDMask); } 319 bool isDim() const { return isImmTy(ImmTyDim); } 320 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 321 bool isDA() const { return isImmTy(ImmTyDA); } 322 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 323 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 324 bool isLWE() const { return isImmTy(ImmTyLWE); } 325 bool isOff() const { return isImmTy(ImmTyOff); } 326 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 327 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 328 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 329 bool isOffen() const { return isImmTy(ImmTyOffen); } 330 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 331 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 332 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 333 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 334 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 335 336 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 337 bool isGDS() const { return isImmTy(ImmTyGDS); } 338 bool isLDS() const { return isImmTy(ImmTyLDS); } 339 bool isDLC() const { return isImmTy(ImmTyDLC); } 340 bool isGLC() const { return isImmTy(ImmTyGLC); } 341 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 342 // value of the GLC operand. 343 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 344 bool isSLC() const { return isImmTy(ImmTySLC); } 345 bool isSWZ() const { return isImmTy(ImmTySWZ); } 346 bool isTFE() const { return isImmTy(ImmTyTFE); } 347 bool isD16() const { return isImmTy(ImmTyD16); } 348 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 349 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 350 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 351 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 352 bool isFI() const { return isImmTy(ImmTyDppFi); } 353 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 354 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 355 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 356 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 357 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 358 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 359 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 360 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 361 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 362 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 363 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 364 bool isHigh() const { return isImmTy(ImmTyHigh); } 365 366 bool isMod() const { 367 return isClampSI() || isOModSI(); 368 } 369 370 bool isRegOrImm() const { 371 return isReg() || isImm(); 372 } 373 374 bool isRegClass(unsigned RCID) const; 375 376 bool isInlineValue() const; 377 378 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 379 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 380 } 381 382 bool isSCSrcB16() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 384 } 385 386 bool isSCSrcV2B16() const { 387 return isSCSrcB16(); 388 } 389 390 bool isSCSrcB32() const { 391 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 392 } 393 394 bool isSCSrcB64() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 396 } 397 398 bool isBoolReg() const; 399 400 bool isSCSrcF16() const { 401 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 402 } 403 404 bool isSCSrcV2F16() const { 405 return isSCSrcF16(); 406 } 407 408 bool isSCSrcF32() const { 409 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 410 } 411 412 bool isSCSrcF64() const { 413 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 414 } 415 416 bool isSSrcB32() const { 417 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 418 } 419 420 bool isSSrcB16() const { 421 return isSCSrcB16() || isLiteralImm(MVT::i16); 422 } 423 424 bool isSSrcV2B16() const { 425 llvm_unreachable("cannot happen"); 426 return isSSrcB16(); 427 } 428 429 bool isSSrcB64() const { 430 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 431 // See isVSrc64(). 432 return isSCSrcB64() || isLiteralImm(MVT::i64); 433 } 434 435 bool isSSrcF32() const { 436 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 437 } 438 439 bool isSSrcF64() const { 440 return isSCSrcB64() || isLiteralImm(MVT::f64); 441 } 442 443 bool isSSrcF16() const { 444 return isSCSrcB16() || isLiteralImm(MVT::f16); 445 } 446 447 bool isSSrcV2F16() const { 448 llvm_unreachable("cannot happen"); 449 return isSSrcF16(); 450 } 451 452 bool isSSrcOrLdsB32() const { 453 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 454 isLiteralImm(MVT::i32) || isExpr(); 455 } 456 457 bool isVCSrcB32() const { 458 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 459 } 460 461 bool isVCSrcB64() const { 462 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 463 } 464 465 bool isVCSrcB16() const { 466 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 467 } 468 469 bool isVCSrcV2B16() const { 470 return isVCSrcB16(); 471 } 472 473 bool isVCSrcF32() const { 474 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 475 } 476 477 bool isVCSrcF64() const { 478 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 479 } 480 481 bool isVCSrcF16() const { 482 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 483 } 484 485 bool isVCSrcV2F16() const { 486 return isVCSrcF16(); 487 } 488 489 bool isVSrcB32() const { 490 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 491 } 492 493 bool isVSrcB64() const { 494 return isVCSrcF64() || isLiteralImm(MVT::i64); 495 } 496 497 bool isVSrcB16() const { 498 return isVCSrcB16() || isLiteralImm(MVT::i16); 499 } 500 501 bool isVSrcV2B16() const { 502 return isVSrcB16() || isLiteralImm(MVT::v2i16); 503 } 504 505 bool isVSrcF32() const { 506 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 507 } 508 509 bool isVSrcF64() const { 510 return isVCSrcF64() || isLiteralImm(MVT::f64); 511 } 512 513 bool isVSrcF16() const { 514 return isVCSrcF16() || isLiteralImm(MVT::f16); 515 } 516 517 bool isVSrcV2F16() const { 518 return isVSrcF16() || isLiteralImm(MVT::v2f16); 519 } 520 521 bool isVISrcB32() const { 522 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 523 } 524 525 bool isVISrcB16() const { 526 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 527 } 528 529 bool isVISrcV2B16() const { 530 return isVISrcB16(); 531 } 532 533 bool isVISrcF32() const { 534 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 535 } 536 537 bool isVISrcF16() const { 538 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 539 } 540 541 bool isVISrcV2F16() const { 542 return isVISrcF16() || isVISrcB32(); 543 } 544 545 bool isAISrcB32() const { 546 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 547 } 548 549 bool isAISrcB16() const { 550 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 551 } 552 553 bool isAISrcV2B16() const { 554 return isAISrcB16(); 555 } 556 557 bool isAISrcF32() const { 558 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 559 } 560 561 bool isAISrcF16() const { 562 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 563 } 564 565 bool isAISrcV2F16() const { 566 return isAISrcF16() || isAISrcB32(); 567 } 568 569 bool isAISrc_128B32() const { 570 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 571 } 572 573 bool isAISrc_128B16() const { 574 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 575 } 576 577 bool isAISrc_128V2B16() const { 578 return isAISrc_128B16(); 579 } 580 581 bool isAISrc_128F32() const { 582 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 583 } 584 585 bool isAISrc_128F16() const { 586 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 587 } 588 589 bool isAISrc_128V2F16() const { 590 return isAISrc_128F16() || isAISrc_128B32(); 591 } 592 593 bool isAISrc_512B32() const { 594 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 595 } 596 597 bool isAISrc_512B16() const { 598 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 599 } 600 601 bool isAISrc_512V2B16() const { 602 return isAISrc_512B16(); 603 } 604 605 bool isAISrc_512F32() const { 606 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 607 } 608 609 bool isAISrc_512F16() const { 610 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 611 } 612 613 bool isAISrc_512V2F16() const { 614 return isAISrc_512F16() || isAISrc_512B32(); 615 } 616 617 bool isAISrc_1024B32() const { 618 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 619 } 620 621 bool isAISrc_1024B16() const { 622 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 623 } 624 625 bool isAISrc_1024V2B16() const { 626 return isAISrc_1024B16(); 627 } 628 629 bool isAISrc_1024F32() const { 630 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 631 } 632 633 bool isAISrc_1024F16() const { 634 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 635 } 636 637 bool isAISrc_1024V2F16() const { 638 return isAISrc_1024F16() || isAISrc_1024B32(); 639 } 640 641 bool isKImmFP32() const { 642 return isLiteralImm(MVT::f32); 643 } 644 645 bool isKImmFP16() const { 646 return isLiteralImm(MVT::f16); 647 } 648 649 bool isMem() const override { 650 return false; 651 } 652 653 bool isExpr() const { 654 return Kind == Expression; 655 } 656 657 bool isSoppBrTarget() const { 658 return isExpr() || isImm(); 659 } 660 661 bool isSWaitCnt() const; 662 bool isHwreg() const; 663 bool isSendMsg() const; 664 bool isSwizzle() const; 665 bool isSMRDOffset8() const; 666 bool isSMEMOffset() const; 667 bool isSMRDLiteralOffset() const; 668 bool isDPP8() const; 669 bool isDPPCtrl() const; 670 bool isBLGP() const; 671 bool isCBSZ() const; 672 bool isABID() const; 673 bool isGPRIdxMode() const; 674 bool isS16Imm() const; 675 bool isU16Imm() const; 676 bool isEndpgm() const; 677 678 StringRef getExpressionAsToken() const { 679 assert(isExpr()); 680 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 681 return S->getSymbol().getName(); 682 } 683 684 StringRef getToken() const { 685 assert(isToken()); 686 687 if (Kind == Expression) 688 return getExpressionAsToken(); 689 690 return StringRef(Tok.Data, Tok.Length); 691 } 692 693 int64_t getImm() const { 694 assert(isImm()); 695 return Imm.Val; 696 } 697 698 void setImm(int64_t Val) { 699 assert(isImm()); 700 Imm.Val = Val; 701 } 702 703 ImmTy getImmTy() const { 704 assert(isImm()); 705 return Imm.Type; 706 } 707 708 unsigned getReg() const override { 709 assert(isRegKind()); 710 return Reg.RegNo; 711 } 712 713 SMLoc getStartLoc() const override { 714 return StartLoc; 715 } 716 717 SMLoc getEndLoc() const override { 718 return EndLoc; 719 } 720 721 SMRange getLocRange() const { 722 return SMRange(StartLoc, EndLoc); 723 } 724 725 Modifiers getModifiers() const { 726 assert(isRegKind() || isImmTy(ImmTyNone)); 727 return isRegKind() ? Reg.Mods : Imm.Mods; 728 } 729 730 void setModifiers(Modifiers Mods) { 731 assert(isRegKind() || isImmTy(ImmTyNone)); 732 if (isRegKind()) 733 Reg.Mods = Mods; 734 else 735 Imm.Mods = Mods; 736 } 737 738 bool hasModifiers() const { 739 return getModifiers().hasModifiers(); 740 } 741 742 bool hasFPModifiers() const { 743 return getModifiers().hasFPModifiers(); 744 } 745 746 bool hasIntModifiers() const { 747 return getModifiers().hasIntModifiers(); 748 } 749 750 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 751 752 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 753 754 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 755 756 template <unsigned Bitwidth> 757 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 758 759 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 760 addKImmFPOperands<16>(Inst, N); 761 } 762 763 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 764 addKImmFPOperands<32>(Inst, N); 765 } 766 767 void addRegOperands(MCInst &Inst, unsigned N) const; 768 769 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 770 addRegOperands(Inst, N); 771 } 772 773 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 774 if (isRegKind()) 775 addRegOperands(Inst, N); 776 else if (isExpr()) 777 Inst.addOperand(MCOperand::createExpr(Expr)); 778 else 779 addImmOperands(Inst, N); 780 } 781 782 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 783 Modifiers Mods = getModifiers(); 784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 785 if (isRegKind()) { 786 addRegOperands(Inst, N); 787 } else { 788 addImmOperands(Inst, N, false); 789 } 790 } 791 792 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 793 assert(!hasIntModifiers()); 794 addRegOrImmWithInputModsOperands(Inst, N); 795 } 796 797 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 798 assert(!hasFPModifiers()); 799 addRegOrImmWithInputModsOperands(Inst, N); 800 } 801 802 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 803 Modifiers Mods = getModifiers(); 804 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 805 assert(isRegKind()); 806 addRegOperands(Inst, N); 807 } 808 809 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 810 assert(!hasIntModifiers()); 811 addRegWithInputModsOperands(Inst, N); 812 } 813 814 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 815 assert(!hasFPModifiers()); 816 addRegWithInputModsOperands(Inst, N); 817 } 818 819 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 820 if (isImm()) 821 addImmOperands(Inst, N); 822 else { 823 assert(isExpr()); 824 Inst.addOperand(MCOperand::createExpr(Expr)); 825 } 826 } 827 828 static void printImmTy(raw_ostream& OS, ImmTy Type) { 829 switch (Type) { 830 case ImmTyNone: OS << "None"; break; 831 case ImmTyGDS: OS << "GDS"; break; 832 case ImmTyLDS: OS << "LDS"; break; 833 case ImmTyOffen: OS << "Offen"; break; 834 case ImmTyIdxen: OS << "Idxen"; break; 835 case ImmTyAddr64: OS << "Addr64"; break; 836 case ImmTyOffset: OS << "Offset"; break; 837 case ImmTyInstOffset: OS << "InstOffset"; break; 838 case ImmTyOffset0: OS << "Offset0"; break; 839 case ImmTyOffset1: OS << "Offset1"; break; 840 case ImmTyDLC: OS << "DLC"; break; 841 case ImmTyGLC: OS << "GLC"; break; 842 case ImmTySLC: OS << "SLC"; break; 843 case ImmTySWZ: OS << "SWZ"; break; 844 case ImmTyTFE: OS << "TFE"; break; 845 case ImmTyD16: OS << "D16"; break; 846 case ImmTyFORMAT: OS << "FORMAT"; break; 847 case ImmTyClampSI: OS << "ClampSI"; break; 848 case ImmTyOModSI: OS << "OModSI"; break; 849 case ImmTyDPP8: OS << "DPP8"; break; 850 case ImmTyDppCtrl: OS << "DppCtrl"; break; 851 case ImmTyDppRowMask: OS << "DppRowMask"; break; 852 case ImmTyDppBankMask: OS << "DppBankMask"; break; 853 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 854 case ImmTyDppFi: OS << "FI"; break; 855 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 856 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 857 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 858 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 859 case ImmTyDMask: OS << "DMask"; break; 860 case ImmTyDim: OS << "Dim"; break; 861 case ImmTyUNorm: OS << "UNorm"; break; 862 case ImmTyDA: OS << "DA"; break; 863 case ImmTyR128A16: OS << "R128A16"; break; 864 case ImmTyA16: OS << "A16"; break; 865 case ImmTyLWE: OS << "LWE"; break; 866 case ImmTyOff: OS << "Off"; break; 867 case ImmTyExpTgt: OS << "ExpTgt"; break; 868 case ImmTyExpCompr: OS << "ExpCompr"; break; 869 case ImmTyExpVM: OS << "ExpVM"; break; 870 case ImmTyHwreg: OS << "Hwreg"; break; 871 case ImmTySendMsg: OS << "SendMsg"; break; 872 case ImmTyInterpSlot: OS << "InterpSlot"; break; 873 case ImmTyInterpAttr: OS << "InterpAttr"; break; 874 case ImmTyAttrChan: OS << "AttrChan"; break; 875 case ImmTyOpSel: OS << "OpSel"; break; 876 case ImmTyOpSelHi: OS << "OpSelHi"; break; 877 case ImmTyNegLo: OS << "NegLo"; break; 878 case ImmTyNegHi: OS << "NegHi"; break; 879 case ImmTySwizzle: OS << "Swizzle"; break; 880 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 881 case ImmTyHigh: OS << "High"; break; 882 case ImmTyBLGP: OS << "BLGP"; break; 883 case ImmTyCBSZ: OS << "CBSZ"; break; 884 case ImmTyABID: OS << "ABID"; break; 885 case ImmTyEndpgm: OS << "Endpgm"; break; 886 } 887 } 888 889 void print(raw_ostream &OS) const override { 890 switch (Kind) { 891 case Register: 892 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 893 break; 894 case Immediate: 895 OS << '<' << getImm(); 896 if (getImmTy() != ImmTyNone) { 897 OS << " type: "; printImmTy(OS, getImmTy()); 898 } 899 OS << " mods: " << Imm.Mods << '>'; 900 break; 901 case Token: 902 OS << '\'' << getToken() << '\''; 903 break; 904 case Expression: 905 OS << "<expr " << *Expr << '>'; 906 break; 907 } 908 } 909 910 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 911 int64_t Val, SMLoc Loc, 912 ImmTy Type = ImmTyNone, 913 bool IsFPImm = false) { 914 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 915 Op->Imm.Val = Val; 916 Op->Imm.IsFPImm = IsFPImm; 917 Op->Imm.Kind = ImmKindTyNone; 918 Op->Imm.Type = Type; 919 Op->Imm.Mods = Modifiers(); 920 Op->StartLoc = Loc; 921 Op->EndLoc = Loc; 922 return Op; 923 } 924 925 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 926 StringRef Str, SMLoc Loc, 927 bool HasExplicitEncodingSize = true) { 928 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 929 Res->Tok.Data = Str.data(); 930 Res->Tok.Length = Str.size(); 931 Res->StartLoc = Loc; 932 Res->EndLoc = Loc; 933 return Res; 934 } 935 936 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 937 unsigned RegNo, SMLoc S, 938 SMLoc E) { 939 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 940 Op->Reg.RegNo = RegNo; 941 Op->Reg.Mods = Modifiers(); 942 Op->StartLoc = S; 943 Op->EndLoc = E; 944 return Op; 945 } 946 947 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 948 const class MCExpr *Expr, SMLoc S) { 949 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 950 Op->Expr = Expr; 951 Op->StartLoc = S; 952 Op->EndLoc = S; 953 return Op; 954 } 955 }; 956 957 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 958 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 959 return OS; 960 } 961 962 //===----------------------------------------------------------------------===// 963 // AsmParser 964 //===----------------------------------------------------------------------===// 965 966 // Holds info related to the current kernel, e.g. count of SGPRs used. 967 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 968 // .amdgpu_hsa_kernel or at EOF. 969 class KernelScopeInfo { 970 int SgprIndexUnusedMin = -1; 971 int VgprIndexUnusedMin = -1; 972 MCContext *Ctx = nullptr; 973 974 void usesSgprAt(int i) { 975 if (i >= SgprIndexUnusedMin) { 976 SgprIndexUnusedMin = ++i; 977 if (Ctx) { 978 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 979 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 980 } 981 } 982 } 983 984 void usesVgprAt(int i) { 985 if (i >= VgprIndexUnusedMin) { 986 VgprIndexUnusedMin = ++i; 987 if (Ctx) { 988 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 989 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 990 } 991 } 992 } 993 994 public: 995 KernelScopeInfo() = default; 996 997 void initialize(MCContext &Context) { 998 Ctx = &Context; 999 usesSgprAt(SgprIndexUnusedMin = -1); 1000 usesVgprAt(VgprIndexUnusedMin = -1); 1001 } 1002 1003 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1004 switch (RegKind) { 1005 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1006 case IS_AGPR: // fall through 1007 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1008 default: break; 1009 } 1010 } 1011 }; 1012 1013 class AMDGPUAsmParser : public MCTargetAsmParser { 1014 MCAsmParser &Parser; 1015 1016 // Number of extra operands parsed after the first optional operand. 1017 // This may be necessary to skip hardcoded mandatory operands. 1018 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1019 1020 unsigned ForcedEncodingSize = 0; 1021 bool ForcedDPP = false; 1022 bool ForcedSDWA = false; 1023 KernelScopeInfo KernelScope; 1024 1025 /// @name Auto-generated Match Functions 1026 /// { 1027 1028 #define GET_ASSEMBLER_HEADER 1029 #include "AMDGPUGenAsmMatcher.inc" 1030 1031 /// } 1032 1033 private: 1034 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1035 bool OutOfRangeError(SMRange Range); 1036 /// Calculate VGPR/SGPR blocks required for given target, reserved 1037 /// registers, and user-specified NextFreeXGPR values. 1038 /// 1039 /// \param Features [in] Target features, used for bug corrections. 1040 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1041 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1042 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1043 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1044 /// descriptor field, if valid. 1045 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1046 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1047 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1048 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1049 /// \param VGPRBlocks [out] Result VGPR block count. 1050 /// \param SGPRBlocks [out] Result SGPR block count. 1051 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1052 bool FlatScrUsed, bool XNACKUsed, 1053 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1054 SMRange VGPRRange, unsigned NextFreeSGPR, 1055 SMRange SGPRRange, unsigned &VGPRBlocks, 1056 unsigned &SGPRBlocks); 1057 bool ParseDirectiveAMDGCNTarget(); 1058 bool ParseDirectiveAMDHSAKernel(); 1059 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1060 bool ParseDirectiveHSACodeObjectVersion(); 1061 bool ParseDirectiveHSACodeObjectISA(); 1062 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1063 bool ParseDirectiveAMDKernelCodeT(); 1064 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1065 bool ParseDirectiveAMDGPUHsaKernel(); 1066 1067 bool ParseDirectiveISAVersion(); 1068 bool ParseDirectiveHSAMetadata(); 1069 bool ParseDirectivePALMetadataBegin(); 1070 bool ParseDirectivePALMetadata(); 1071 bool ParseDirectiveAMDGPULDS(); 1072 1073 /// Common code to parse out a block of text (typically YAML) between start and 1074 /// end directives. 1075 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1076 const char *AssemblerDirectiveEnd, 1077 std::string &CollectString); 1078 1079 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1080 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1081 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1082 unsigned &RegNum, unsigned &RegWidth, 1083 bool RestoreOnFailure = false); 1084 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1085 unsigned &RegNum, unsigned &RegWidth, 1086 SmallVectorImpl<AsmToken> &Tokens); 1087 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1088 unsigned &RegWidth, 1089 SmallVectorImpl<AsmToken> &Tokens); 1090 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1091 unsigned &RegWidth, 1092 SmallVectorImpl<AsmToken> &Tokens); 1093 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1094 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1095 bool ParseRegRange(unsigned& Num, unsigned& Width); 1096 unsigned getRegularReg(RegisterKind RegKind, 1097 unsigned RegNum, 1098 unsigned RegWidth, 1099 SMLoc Loc); 1100 1101 bool isRegister(); 1102 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1103 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1104 void initializeGprCountSymbol(RegisterKind RegKind); 1105 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1106 unsigned RegWidth); 1107 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1108 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1109 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1110 bool IsGdsHardcoded); 1111 1112 public: 1113 enum AMDGPUMatchResultTy { 1114 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1115 }; 1116 enum OperandMode { 1117 OperandMode_Default, 1118 OperandMode_NSA, 1119 }; 1120 1121 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1122 1123 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1124 const MCInstrInfo &MII, 1125 const MCTargetOptions &Options) 1126 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1127 MCAsmParserExtension::Initialize(Parser); 1128 1129 if (getFeatureBits().none()) { 1130 // Set default features. 1131 copySTI().ToggleFeature("southern-islands"); 1132 } 1133 1134 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1135 1136 { 1137 // TODO: make those pre-defined variables read-only. 1138 // Currently there is none suitable machinery in the core llvm-mc for this. 1139 // MCSymbol::isRedefinable is intended for another purpose, and 1140 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1141 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1142 MCContext &Ctx = getContext(); 1143 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1144 MCSymbol *Sym = 1145 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1146 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1147 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1148 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1149 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1150 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1151 } else { 1152 MCSymbol *Sym = 1153 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1154 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1155 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1156 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1157 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1158 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1159 } 1160 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1161 initializeGprCountSymbol(IS_VGPR); 1162 initializeGprCountSymbol(IS_SGPR); 1163 } else 1164 KernelScope.initialize(getContext()); 1165 } 1166 } 1167 1168 bool hasXNACK() const { 1169 return AMDGPU::hasXNACK(getSTI()); 1170 } 1171 1172 bool hasMIMG_R128() const { 1173 return AMDGPU::hasMIMG_R128(getSTI()); 1174 } 1175 1176 bool hasPackedD16() const { 1177 return AMDGPU::hasPackedD16(getSTI()); 1178 } 1179 1180 bool hasGFX10A16() const { 1181 return AMDGPU::hasGFX10A16(getSTI()); 1182 } 1183 1184 bool isSI() const { 1185 return AMDGPU::isSI(getSTI()); 1186 } 1187 1188 bool isCI() const { 1189 return AMDGPU::isCI(getSTI()); 1190 } 1191 1192 bool isVI() const { 1193 return AMDGPU::isVI(getSTI()); 1194 } 1195 1196 bool isGFX9() const { 1197 return AMDGPU::isGFX9(getSTI()); 1198 } 1199 1200 bool isGFX9Plus() const { 1201 return AMDGPU::isGFX9Plus(getSTI()); 1202 } 1203 1204 bool isGFX10() const { 1205 return AMDGPU::isGFX10(getSTI()); 1206 } 1207 1208 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1209 1210 bool isGFX10_BEncoding() const { 1211 return AMDGPU::isGFX10_BEncoding(getSTI()); 1212 } 1213 1214 bool hasInv2PiInlineImm() const { 1215 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1216 } 1217 1218 bool hasFlatOffsets() const { 1219 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1220 } 1221 1222 bool hasSGPR102_SGPR103() const { 1223 return !isVI() && !isGFX9(); 1224 } 1225 1226 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1227 1228 bool hasIntClamp() const { 1229 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1230 } 1231 1232 AMDGPUTargetStreamer &getTargetStreamer() { 1233 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1234 return static_cast<AMDGPUTargetStreamer &>(TS); 1235 } 1236 1237 const MCRegisterInfo *getMRI() const { 1238 // We need this const_cast because for some reason getContext() is not const 1239 // in MCAsmParser. 1240 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1241 } 1242 1243 const MCInstrInfo *getMII() const { 1244 return &MII; 1245 } 1246 1247 const FeatureBitset &getFeatureBits() const { 1248 return getSTI().getFeatureBits(); 1249 } 1250 1251 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1252 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1253 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1254 1255 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1256 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1257 bool isForcedDPP() const { return ForcedDPP; } 1258 bool isForcedSDWA() const { return ForcedSDWA; } 1259 ArrayRef<unsigned> getMatchedVariants() const; 1260 StringRef getMatchedVariantName() const; 1261 1262 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1263 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1264 bool RestoreOnFailure); 1265 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1266 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1267 SMLoc &EndLoc) override; 1268 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1269 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1270 unsigned Kind) override; 1271 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1272 OperandVector &Operands, MCStreamer &Out, 1273 uint64_t &ErrorInfo, 1274 bool MatchingInlineAsm) override; 1275 bool ParseDirective(AsmToken DirectiveID) override; 1276 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1277 OperandMode Mode = OperandMode_Default); 1278 StringRef parseMnemonicSuffix(StringRef Name); 1279 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1280 SMLoc NameLoc, OperandVector &Operands) override; 1281 //bool ProcessInstruction(MCInst &Inst); 1282 1283 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1284 1285 OperandMatchResultTy 1286 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1287 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1288 bool (*ConvertResult)(int64_t &) = nullptr); 1289 1290 OperandMatchResultTy 1291 parseOperandArrayWithPrefix(const char *Prefix, 1292 OperandVector &Operands, 1293 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1294 bool (*ConvertResult)(int64_t&) = nullptr); 1295 1296 OperandMatchResultTy 1297 parseNamedBit(StringRef Name, OperandVector &Operands, 1298 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1299 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1300 StringRef &Value, 1301 SMLoc &StringLoc); 1302 1303 bool isModifier(); 1304 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1305 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1306 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1307 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1308 bool parseSP3NegModifier(); 1309 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1310 OperandMatchResultTy parseReg(OperandVector &Operands); 1311 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1312 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1313 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1314 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1315 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1316 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1317 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1318 OperandMatchResultTy parseUfmt(int64_t &Format); 1319 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1320 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1321 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1322 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1323 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1324 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1325 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1326 1327 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1328 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1329 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1330 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1331 1332 bool parseCnt(int64_t &IntVal); 1333 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1334 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1335 1336 private: 1337 struct OperandInfoTy { 1338 SMLoc Loc; 1339 int64_t Id; 1340 bool IsSymbolic = false; 1341 bool IsDefined = false; 1342 1343 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1344 }; 1345 1346 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1347 bool validateSendMsg(const OperandInfoTy &Msg, 1348 const OperandInfoTy &Op, 1349 const OperandInfoTy &Stream); 1350 1351 bool parseHwregBody(OperandInfoTy &HwReg, 1352 OperandInfoTy &Offset, 1353 OperandInfoTy &Width); 1354 bool validateHwreg(const OperandInfoTy &HwReg, 1355 const OperandInfoTy &Offset, 1356 const OperandInfoTy &Width); 1357 1358 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1359 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1360 1361 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1362 const OperandVector &Operands) const; 1363 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1364 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1365 SMLoc getLitLoc(const OperandVector &Operands) const; 1366 SMLoc getConstLoc(const OperandVector &Operands) const; 1367 1368 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1369 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1370 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1371 bool validateSOPLiteral(const MCInst &Inst) const; 1372 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1373 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1374 bool validateIntClampSupported(const MCInst &Inst); 1375 bool validateMIMGAtomicDMask(const MCInst &Inst); 1376 bool validateMIMGGatherDMask(const MCInst &Inst); 1377 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1378 bool validateMIMGDataSize(const MCInst &Inst); 1379 bool validateMIMGAddrSize(const MCInst &Inst); 1380 bool validateMIMGD16(const MCInst &Inst); 1381 bool validateMIMGDim(const MCInst &Inst); 1382 bool validateLdsDirect(const MCInst &Inst); 1383 bool validateOpSel(const MCInst &Inst); 1384 bool validateVccOperand(unsigned Reg) const; 1385 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1386 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1387 bool validateDivScale(const MCInst &Inst); 1388 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1389 const SMLoc &IDLoc); 1390 unsigned getConstantBusLimit(unsigned Opcode) const; 1391 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1392 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1393 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1394 1395 bool isSupportedMnemo(StringRef Mnemo, 1396 const FeatureBitset &FBS); 1397 bool isSupportedMnemo(StringRef Mnemo, 1398 const FeatureBitset &FBS, 1399 ArrayRef<unsigned> Variants); 1400 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1401 1402 bool isId(const StringRef Id) const; 1403 bool isId(const AsmToken &Token, const StringRef Id) const; 1404 bool isToken(const AsmToken::TokenKind Kind) const; 1405 bool trySkipId(const StringRef Id); 1406 bool trySkipId(const StringRef Pref, const StringRef Id); 1407 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1408 bool trySkipToken(const AsmToken::TokenKind Kind); 1409 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1410 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1411 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1412 1413 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1414 AsmToken::TokenKind getTokenKind() const; 1415 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1416 bool parseExpr(OperandVector &Operands); 1417 StringRef getTokenStr() const; 1418 AsmToken peekToken(); 1419 AsmToken getToken() const; 1420 SMLoc getLoc() const; 1421 void lex(); 1422 1423 public: 1424 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1425 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1426 1427 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1428 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1429 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1430 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1431 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1432 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1433 1434 bool parseSwizzleOperand(int64_t &Op, 1435 const unsigned MinVal, 1436 const unsigned MaxVal, 1437 const StringRef ErrMsg, 1438 SMLoc &Loc); 1439 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1440 const unsigned MinVal, 1441 const unsigned MaxVal, 1442 const StringRef ErrMsg); 1443 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1444 bool parseSwizzleOffset(int64_t &Imm); 1445 bool parseSwizzleMacro(int64_t &Imm); 1446 bool parseSwizzleQuadPerm(int64_t &Imm); 1447 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1448 bool parseSwizzleBroadcast(int64_t &Imm); 1449 bool parseSwizzleSwap(int64_t &Imm); 1450 bool parseSwizzleReverse(int64_t &Imm); 1451 1452 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1453 int64_t parseGPRIdxMacro(); 1454 1455 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1456 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1457 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1458 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1459 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1460 1461 AMDGPUOperand::Ptr defaultDLC() const; 1462 AMDGPUOperand::Ptr defaultGLC() const; 1463 AMDGPUOperand::Ptr defaultGLC_1() const; 1464 AMDGPUOperand::Ptr defaultSLC() const; 1465 1466 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1467 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1468 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1469 AMDGPUOperand::Ptr defaultFlatOffset() const; 1470 1471 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1472 1473 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1474 OptionalImmIndexMap &OptionalIdx); 1475 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1476 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1477 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1478 1479 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1480 1481 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1482 bool IsAtomic = false); 1483 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1484 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1485 1486 OperandMatchResultTy parseDim(OperandVector &Operands); 1487 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1488 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1489 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1490 int64_t parseDPPCtrlSel(StringRef Ctrl); 1491 int64_t parseDPPCtrlPerm(); 1492 AMDGPUOperand::Ptr defaultRowMask() const; 1493 AMDGPUOperand::Ptr defaultBankMask() const; 1494 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1495 AMDGPUOperand::Ptr defaultFI() const; 1496 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1497 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1498 1499 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1500 AMDGPUOperand::ImmTy Type); 1501 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1502 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1503 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1504 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1505 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1506 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1507 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1508 uint64_t BasicInstType, 1509 bool SkipDstVcc = false, 1510 bool SkipSrcVcc = false); 1511 1512 AMDGPUOperand::Ptr defaultBLGP() const; 1513 AMDGPUOperand::Ptr defaultCBSZ() const; 1514 AMDGPUOperand::Ptr defaultABID() const; 1515 1516 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1517 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1518 }; 1519 1520 struct OptionalOperand { 1521 const char *Name; 1522 AMDGPUOperand::ImmTy Type; 1523 bool IsBit; 1524 bool (*ConvertResult)(int64_t&); 1525 }; 1526 1527 } // end anonymous namespace 1528 1529 // May be called with integer type with equivalent bitwidth. 1530 static const fltSemantics *getFltSemantics(unsigned Size) { 1531 switch (Size) { 1532 case 4: 1533 return &APFloat::IEEEsingle(); 1534 case 8: 1535 return &APFloat::IEEEdouble(); 1536 case 2: 1537 return &APFloat::IEEEhalf(); 1538 default: 1539 llvm_unreachable("unsupported fp type"); 1540 } 1541 } 1542 1543 static const fltSemantics *getFltSemantics(MVT VT) { 1544 return getFltSemantics(VT.getSizeInBits() / 8); 1545 } 1546 1547 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1548 switch (OperandType) { 1549 case AMDGPU::OPERAND_REG_IMM_INT32: 1550 case AMDGPU::OPERAND_REG_IMM_FP32: 1551 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1552 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1553 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1554 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1555 return &APFloat::IEEEsingle(); 1556 case AMDGPU::OPERAND_REG_IMM_INT64: 1557 case AMDGPU::OPERAND_REG_IMM_FP64: 1558 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1559 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1560 return &APFloat::IEEEdouble(); 1561 case AMDGPU::OPERAND_REG_IMM_INT16: 1562 case AMDGPU::OPERAND_REG_IMM_FP16: 1563 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1564 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1565 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1566 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1567 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1568 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1569 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1570 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1571 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1572 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1573 return &APFloat::IEEEhalf(); 1574 default: 1575 llvm_unreachable("unsupported fp type"); 1576 } 1577 } 1578 1579 //===----------------------------------------------------------------------===// 1580 // Operand 1581 //===----------------------------------------------------------------------===// 1582 1583 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1584 bool Lost; 1585 1586 // Convert literal to single precision 1587 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1588 APFloat::rmNearestTiesToEven, 1589 &Lost); 1590 // We allow precision lost but not overflow or underflow 1591 if (Status != APFloat::opOK && 1592 Lost && 1593 ((Status & APFloat::opOverflow) != 0 || 1594 (Status & APFloat::opUnderflow) != 0)) { 1595 return false; 1596 } 1597 1598 return true; 1599 } 1600 1601 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1602 return isUIntN(Size, Val) || isIntN(Size, Val); 1603 } 1604 1605 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1606 if (VT.getScalarType() == MVT::i16) { 1607 // FP immediate values are broken. 1608 return isInlinableIntLiteral(Val); 1609 } 1610 1611 // f16/v2f16 operands work correctly for all values. 1612 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1613 } 1614 1615 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1616 1617 // This is a hack to enable named inline values like 1618 // shared_base with both 32-bit and 64-bit operands. 1619 // Note that these values are defined as 1620 // 32-bit operands only. 1621 if (isInlineValue()) { 1622 return true; 1623 } 1624 1625 if (!isImmTy(ImmTyNone)) { 1626 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1627 return false; 1628 } 1629 // TODO: We should avoid using host float here. It would be better to 1630 // check the float bit values which is what a few other places do. 1631 // We've had bot failures before due to weird NaN support on mips hosts. 1632 1633 APInt Literal(64, Imm.Val); 1634 1635 if (Imm.IsFPImm) { // We got fp literal token 1636 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1637 return AMDGPU::isInlinableLiteral64(Imm.Val, 1638 AsmParser->hasInv2PiInlineImm()); 1639 } 1640 1641 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1642 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1643 return false; 1644 1645 if (type.getScalarSizeInBits() == 16) { 1646 return isInlineableLiteralOp16( 1647 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1648 type, AsmParser->hasInv2PiInlineImm()); 1649 } 1650 1651 // Check if single precision literal is inlinable 1652 return AMDGPU::isInlinableLiteral32( 1653 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1654 AsmParser->hasInv2PiInlineImm()); 1655 } 1656 1657 // We got int literal token. 1658 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1659 return AMDGPU::isInlinableLiteral64(Imm.Val, 1660 AsmParser->hasInv2PiInlineImm()); 1661 } 1662 1663 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1664 return false; 1665 } 1666 1667 if (type.getScalarSizeInBits() == 16) { 1668 return isInlineableLiteralOp16( 1669 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1670 type, AsmParser->hasInv2PiInlineImm()); 1671 } 1672 1673 return AMDGPU::isInlinableLiteral32( 1674 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1675 AsmParser->hasInv2PiInlineImm()); 1676 } 1677 1678 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1679 // Check that this immediate can be added as literal 1680 if (!isImmTy(ImmTyNone)) { 1681 return false; 1682 } 1683 1684 if (!Imm.IsFPImm) { 1685 // We got int literal token. 1686 1687 if (type == MVT::f64 && hasFPModifiers()) { 1688 // Cannot apply fp modifiers to int literals preserving the same semantics 1689 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1690 // disable these cases. 1691 return false; 1692 } 1693 1694 unsigned Size = type.getSizeInBits(); 1695 if (Size == 64) 1696 Size = 32; 1697 1698 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1699 // types. 1700 return isSafeTruncation(Imm.Val, Size); 1701 } 1702 1703 // We got fp literal token 1704 if (type == MVT::f64) { // Expected 64-bit fp operand 1705 // We would set low 64-bits of literal to zeroes but we accept this literals 1706 return true; 1707 } 1708 1709 if (type == MVT::i64) { // Expected 64-bit int operand 1710 // We don't allow fp literals in 64-bit integer instructions. It is 1711 // unclear how we should encode them. 1712 return false; 1713 } 1714 1715 // We allow fp literals with f16x2 operands assuming that the specified 1716 // literal goes into the lower half and the upper half is zero. We also 1717 // require that the literal may be losslesly converted to f16. 1718 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1719 (type == MVT::v2i16)? MVT::i16 : type; 1720 1721 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1722 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1723 } 1724 1725 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1726 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1727 } 1728 1729 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1730 if (AsmParser->isVI()) 1731 return isVReg32(); 1732 else if (AsmParser->isGFX9Plus()) 1733 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1734 else 1735 return false; 1736 } 1737 1738 bool AMDGPUOperand::isSDWAFP16Operand() const { 1739 return isSDWAOperand(MVT::f16); 1740 } 1741 1742 bool AMDGPUOperand::isSDWAFP32Operand() const { 1743 return isSDWAOperand(MVT::f32); 1744 } 1745 1746 bool AMDGPUOperand::isSDWAInt16Operand() const { 1747 return isSDWAOperand(MVT::i16); 1748 } 1749 1750 bool AMDGPUOperand::isSDWAInt32Operand() const { 1751 return isSDWAOperand(MVT::i32); 1752 } 1753 1754 bool AMDGPUOperand::isBoolReg() const { 1755 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1756 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1757 } 1758 1759 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1760 { 1761 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1762 assert(Size == 2 || Size == 4 || Size == 8); 1763 1764 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1765 1766 if (Imm.Mods.Abs) { 1767 Val &= ~FpSignMask; 1768 } 1769 if (Imm.Mods.Neg) { 1770 Val ^= FpSignMask; 1771 } 1772 1773 return Val; 1774 } 1775 1776 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1777 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1778 Inst.getNumOperands())) { 1779 addLiteralImmOperand(Inst, Imm.Val, 1780 ApplyModifiers & 1781 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1782 } else { 1783 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1784 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1785 setImmKindNone(); 1786 } 1787 } 1788 1789 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1790 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1791 auto OpNum = Inst.getNumOperands(); 1792 // Check that this operand accepts literals 1793 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1794 1795 if (ApplyModifiers) { 1796 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1797 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1798 Val = applyInputFPModifiers(Val, Size); 1799 } 1800 1801 APInt Literal(64, Val); 1802 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1803 1804 if (Imm.IsFPImm) { // We got fp literal token 1805 switch (OpTy) { 1806 case AMDGPU::OPERAND_REG_IMM_INT64: 1807 case AMDGPU::OPERAND_REG_IMM_FP64: 1808 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1809 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1810 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1811 AsmParser->hasInv2PiInlineImm())) { 1812 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1813 setImmKindConst(); 1814 return; 1815 } 1816 1817 // Non-inlineable 1818 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1819 // For fp operands we check if low 32 bits are zeros 1820 if (Literal.getLoBits(32) != 0) { 1821 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1822 "Can't encode literal as exact 64-bit floating-point operand. " 1823 "Low 32-bits will be set to zero"); 1824 } 1825 1826 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1827 setImmKindLiteral(); 1828 return; 1829 } 1830 1831 // We don't allow fp literals in 64-bit integer instructions. It is 1832 // unclear how we should encode them. This case should be checked earlier 1833 // in predicate methods (isLiteralImm()) 1834 llvm_unreachable("fp literal in 64-bit integer instruction."); 1835 1836 case AMDGPU::OPERAND_REG_IMM_INT32: 1837 case AMDGPU::OPERAND_REG_IMM_FP32: 1838 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1839 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1840 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1841 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1842 case AMDGPU::OPERAND_REG_IMM_INT16: 1843 case AMDGPU::OPERAND_REG_IMM_FP16: 1844 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1845 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1846 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1847 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1849 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1850 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1851 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1852 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1853 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1854 bool lost; 1855 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1856 // Convert literal to single precision 1857 FPLiteral.convert(*getOpFltSemantics(OpTy), 1858 APFloat::rmNearestTiesToEven, &lost); 1859 // We allow precision lost but not overflow or underflow. This should be 1860 // checked earlier in isLiteralImm() 1861 1862 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1863 Inst.addOperand(MCOperand::createImm(ImmVal)); 1864 setImmKindLiteral(); 1865 return; 1866 } 1867 default: 1868 llvm_unreachable("invalid operand size"); 1869 } 1870 1871 return; 1872 } 1873 1874 // We got int literal token. 1875 // Only sign extend inline immediates. 1876 switch (OpTy) { 1877 case AMDGPU::OPERAND_REG_IMM_INT32: 1878 case AMDGPU::OPERAND_REG_IMM_FP32: 1879 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1880 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1881 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1882 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1883 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1884 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1885 if (isSafeTruncation(Val, 32) && 1886 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1887 AsmParser->hasInv2PiInlineImm())) { 1888 Inst.addOperand(MCOperand::createImm(Val)); 1889 setImmKindConst(); 1890 return; 1891 } 1892 1893 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1894 setImmKindLiteral(); 1895 return; 1896 1897 case AMDGPU::OPERAND_REG_IMM_INT64: 1898 case AMDGPU::OPERAND_REG_IMM_FP64: 1899 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1900 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1901 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1902 Inst.addOperand(MCOperand::createImm(Val)); 1903 setImmKindConst(); 1904 return; 1905 } 1906 1907 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1908 setImmKindLiteral(); 1909 return; 1910 1911 case AMDGPU::OPERAND_REG_IMM_INT16: 1912 case AMDGPU::OPERAND_REG_IMM_FP16: 1913 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1914 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1915 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1916 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1917 if (isSafeTruncation(Val, 16) && 1918 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1919 AsmParser->hasInv2PiInlineImm())) { 1920 Inst.addOperand(MCOperand::createImm(Val)); 1921 setImmKindConst(); 1922 return; 1923 } 1924 1925 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1926 setImmKindLiteral(); 1927 return; 1928 1929 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1930 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1931 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1932 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1933 assert(isSafeTruncation(Val, 16)); 1934 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1935 AsmParser->hasInv2PiInlineImm())); 1936 1937 Inst.addOperand(MCOperand::createImm(Val)); 1938 return; 1939 } 1940 default: 1941 llvm_unreachable("invalid operand size"); 1942 } 1943 } 1944 1945 template <unsigned Bitwidth> 1946 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1947 APInt Literal(64, Imm.Val); 1948 setImmKindNone(); 1949 1950 if (!Imm.IsFPImm) { 1951 // We got int literal token. 1952 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1953 return; 1954 } 1955 1956 bool Lost; 1957 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1958 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1959 APFloat::rmNearestTiesToEven, &Lost); 1960 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1961 } 1962 1963 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1964 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1965 } 1966 1967 static bool isInlineValue(unsigned Reg) { 1968 switch (Reg) { 1969 case AMDGPU::SRC_SHARED_BASE: 1970 case AMDGPU::SRC_SHARED_LIMIT: 1971 case AMDGPU::SRC_PRIVATE_BASE: 1972 case AMDGPU::SRC_PRIVATE_LIMIT: 1973 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1974 return true; 1975 case AMDGPU::SRC_VCCZ: 1976 case AMDGPU::SRC_EXECZ: 1977 case AMDGPU::SRC_SCC: 1978 return true; 1979 case AMDGPU::SGPR_NULL: 1980 return true; 1981 default: 1982 return false; 1983 } 1984 } 1985 1986 bool AMDGPUOperand::isInlineValue() const { 1987 return isRegKind() && ::isInlineValue(getReg()); 1988 } 1989 1990 //===----------------------------------------------------------------------===// 1991 // AsmParser 1992 //===----------------------------------------------------------------------===// 1993 1994 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1995 if (Is == IS_VGPR) { 1996 switch (RegWidth) { 1997 default: return -1; 1998 case 1: return AMDGPU::VGPR_32RegClassID; 1999 case 2: return AMDGPU::VReg_64RegClassID; 2000 case 3: return AMDGPU::VReg_96RegClassID; 2001 case 4: return AMDGPU::VReg_128RegClassID; 2002 case 5: return AMDGPU::VReg_160RegClassID; 2003 case 6: return AMDGPU::VReg_192RegClassID; 2004 case 8: return AMDGPU::VReg_256RegClassID; 2005 case 16: return AMDGPU::VReg_512RegClassID; 2006 case 32: return AMDGPU::VReg_1024RegClassID; 2007 } 2008 } else if (Is == IS_TTMP) { 2009 switch (RegWidth) { 2010 default: return -1; 2011 case 1: return AMDGPU::TTMP_32RegClassID; 2012 case 2: return AMDGPU::TTMP_64RegClassID; 2013 case 4: return AMDGPU::TTMP_128RegClassID; 2014 case 8: return AMDGPU::TTMP_256RegClassID; 2015 case 16: return AMDGPU::TTMP_512RegClassID; 2016 } 2017 } else if (Is == IS_SGPR) { 2018 switch (RegWidth) { 2019 default: return -1; 2020 case 1: return AMDGPU::SGPR_32RegClassID; 2021 case 2: return AMDGPU::SGPR_64RegClassID; 2022 case 3: return AMDGPU::SGPR_96RegClassID; 2023 case 4: return AMDGPU::SGPR_128RegClassID; 2024 case 5: return AMDGPU::SGPR_160RegClassID; 2025 case 6: return AMDGPU::SGPR_192RegClassID; 2026 case 8: return AMDGPU::SGPR_256RegClassID; 2027 case 16: return AMDGPU::SGPR_512RegClassID; 2028 } 2029 } else if (Is == IS_AGPR) { 2030 switch (RegWidth) { 2031 default: return -1; 2032 case 1: return AMDGPU::AGPR_32RegClassID; 2033 case 2: return AMDGPU::AReg_64RegClassID; 2034 case 3: return AMDGPU::AReg_96RegClassID; 2035 case 4: return AMDGPU::AReg_128RegClassID; 2036 case 5: return AMDGPU::AReg_160RegClassID; 2037 case 6: return AMDGPU::AReg_192RegClassID; 2038 case 8: return AMDGPU::AReg_256RegClassID; 2039 case 16: return AMDGPU::AReg_512RegClassID; 2040 case 32: return AMDGPU::AReg_1024RegClassID; 2041 } 2042 } 2043 return -1; 2044 } 2045 2046 static unsigned getSpecialRegForName(StringRef RegName) { 2047 return StringSwitch<unsigned>(RegName) 2048 .Case("exec", AMDGPU::EXEC) 2049 .Case("vcc", AMDGPU::VCC) 2050 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2051 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2052 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2053 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2054 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2055 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2056 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2057 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2058 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2059 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2060 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2061 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2062 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2063 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2064 .Case("m0", AMDGPU::M0) 2065 .Case("vccz", AMDGPU::SRC_VCCZ) 2066 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2067 .Case("execz", AMDGPU::SRC_EXECZ) 2068 .Case("src_execz", AMDGPU::SRC_EXECZ) 2069 .Case("scc", AMDGPU::SRC_SCC) 2070 .Case("src_scc", AMDGPU::SRC_SCC) 2071 .Case("tba", AMDGPU::TBA) 2072 .Case("tma", AMDGPU::TMA) 2073 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2074 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2075 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2076 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2077 .Case("vcc_lo", AMDGPU::VCC_LO) 2078 .Case("vcc_hi", AMDGPU::VCC_HI) 2079 .Case("exec_lo", AMDGPU::EXEC_LO) 2080 .Case("exec_hi", AMDGPU::EXEC_HI) 2081 .Case("tma_lo", AMDGPU::TMA_LO) 2082 .Case("tma_hi", AMDGPU::TMA_HI) 2083 .Case("tba_lo", AMDGPU::TBA_LO) 2084 .Case("tba_hi", AMDGPU::TBA_HI) 2085 .Case("pc", AMDGPU::PC_REG) 2086 .Case("null", AMDGPU::SGPR_NULL) 2087 .Default(AMDGPU::NoRegister); 2088 } 2089 2090 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2091 SMLoc &EndLoc, bool RestoreOnFailure) { 2092 auto R = parseRegister(); 2093 if (!R) return true; 2094 assert(R->isReg()); 2095 RegNo = R->getReg(); 2096 StartLoc = R->getStartLoc(); 2097 EndLoc = R->getEndLoc(); 2098 return false; 2099 } 2100 2101 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2102 SMLoc &EndLoc) { 2103 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2104 } 2105 2106 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2107 SMLoc &StartLoc, 2108 SMLoc &EndLoc) { 2109 bool Result = 2110 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2111 bool PendingErrors = getParser().hasPendingError(); 2112 getParser().clearPendingErrors(); 2113 if (PendingErrors) 2114 return MatchOperand_ParseFail; 2115 if (Result) 2116 return MatchOperand_NoMatch; 2117 return MatchOperand_Success; 2118 } 2119 2120 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2121 RegisterKind RegKind, unsigned Reg1, 2122 SMLoc Loc) { 2123 switch (RegKind) { 2124 case IS_SPECIAL: 2125 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2126 Reg = AMDGPU::EXEC; 2127 RegWidth = 2; 2128 return true; 2129 } 2130 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2131 Reg = AMDGPU::FLAT_SCR; 2132 RegWidth = 2; 2133 return true; 2134 } 2135 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2136 Reg = AMDGPU::XNACK_MASK; 2137 RegWidth = 2; 2138 return true; 2139 } 2140 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2141 Reg = AMDGPU::VCC; 2142 RegWidth = 2; 2143 return true; 2144 } 2145 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2146 Reg = AMDGPU::TBA; 2147 RegWidth = 2; 2148 return true; 2149 } 2150 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2151 Reg = AMDGPU::TMA; 2152 RegWidth = 2; 2153 return true; 2154 } 2155 Error(Loc, "register does not fit in the list"); 2156 return false; 2157 case IS_VGPR: 2158 case IS_SGPR: 2159 case IS_AGPR: 2160 case IS_TTMP: 2161 if (Reg1 != Reg + RegWidth) { 2162 Error(Loc, "registers in a list must have consecutive indices"); 2163 return false; 2164 } 2165 RegWidth++; 2166 return true; 2167 default: 2168 llvm_unreachable("unexpected register kind"); 2169 } 2170 } 2171 2172 struct RegInfo { 2173 StringLiteral Name; 2174 RegisterKind Kind; 2175 }; 2176 2177 static constexpr RegInfo RegularRegisters[] = { 2178 {{"v"}, IS_VGPR}, 2179 {{"s"}, IS_SGPR}, 2180 {{"ttmp"}, IS_TTMP}, 2181 {{"acc"}, IS_AGPR}, 2182 {{"a"}, IS_AGPR}, 2183 }; 2184 2185 static bool isRegularReg(RegisterKind Kind) { 2186 return Kind == IS_VGPR || 2187 Kind == IS_SGPR || 2188 Kind == IS_TTMP || 2189 Kind == IS_AGPR; 2190 } 2191 2192 static const RegInfo* getRegularRegInfo(StringRef Str) { 2193 for (const RegInfo &Reg : RegularRegisters) 2194 if (Str.startswith(Reg.Name)) 2195 return &Reg; 2196 return nullptr; 2197 } 2198 2199 static bool getRegNum(StringRef Str, unsigned& Num) { 2200 return !Str.getAsInteger(10, Num); 2201 } 2202 2203 bool 2204 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2205 const AsmToken &NextToken) const { 2206 2207 // A list of consecutive registers: [s0,s1,s2,s3] 2208 if (Token.is(AsmToken::LBrac)) 2209 return true; 2210 2211 if (!Token.is(AsmToken::Identifier)) 2212 return false; 2213 2214 // A single register like s0 or a range of registers like s[0:1] 2215 2216 StringRef Str = Token.getString(); 2217 const RegInfo *Reg = getRegularRegInfo(Str); 2218 if (Reg) { 2219 StringRef RegName = Reg->Name; 2220 StringRef RegSuffix = Str.substr(RegName.size()); 2221 if (!RegSuffix.empty()) { 2222 unsigned Num; 2223 // A single register with an index: rXX 2224 if (getRegNum(RegSuffix, Num)) 2225 return true; 2226 } else { 2227 // A range of registers: r[XX:YY]. 2228 if (NextToken.is(AsmToken::LBrac)) 2229 return true; 2230 } 2231 } 2232 2233 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2234 } 2235 2236 bool 2237 AMDGPUAsmParser::isRegister() 2238 { 2239 return isRegister(getToken(), peekToken()); 2240 } 2241 2242 unsigned 2243 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2244 unsigned RegNum, 2245 unsigned RegWidth, 2246 SMLoc Loc) { 2247 2248 assert(isRegularReg(RegKind)); 2249 2250 unsigned AlignSize = 1; 2251 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2252 // SGPR and TTMP registers must be aligned. 2253 // Max required alignment is 4 dwords. 2254 AlignSize = std::min(RegWidth, 4u); 2255 } 2256 2257 if (RegNum % AlignSize != 0) { 2258 Error(Loc, "invalid register alignment"); 2259 return AMDGPU::NoRegister; 2260 } 2261 2262 unsigned RegIdx = RegNum / AlignSize; 2263 int RCID = getRegClass(RegKind, RegWidth); 2264 if (RCID == -1) { 2265 Error(Loc, "invalid or unsupported register size"); 2266 return AMDGPU::NoRegister; 2267 } 2268 2269 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2270 const MCRegisterClass RC = TRI->getRegClass(RCID); 2271 if (RegIdx >= RC.getNumRegs()) { 2272 Error(Loc, "register index is out of range"); 2273 return AMDGPU::NoRegister; 2274 } 2275 2276 return RC.getRegister(RegIdx); 2277 } 2278 2279 bool 2280 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2281 int64_t RegLo, RegHi; 2282 if (!skipToken(AsmToken::LBrac, "missing register index")) 2283 return false; 2284 2285 SMLoc FirstIdxLoc = getLoc(); 2286 SMLoc SecondIdxLoc; 2287 2288 if (!parseExpr(RegLo)) 2289 return false; 2290 2291 if (trySkipToken(AsmToken::Colon)) { 2292 SecondIdxLoc = getLoc(); 2293 if (!parseExpr(RegHi)) 2294 return false; 2295 } else { 2296 RegHi = RegLo; 2297 } 2298 2299 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2300 return false; 2301 2302 if (!isUInt<32>(RegLo)) { 2303 Error(FirstIdxLoc, "invalid register index"); 2304 return false; 2305 } 2306 2307 if (!isUInt<32>(RegHi)) { 2308 Error(SecondIdxLoc, "invalid register index"); 2309 return false; 2310 } 2311 2312 if (RegLo > RegHi) { 2313 Error(FirstIdxLoc, "first register index should not exceed second index"); 2314 return false; 2315 } 2316 2317 Num = static_cast<unsigned>(RegLo); 2318 Width = (RegHi - RegLo) + 1; 2319 return true; 2320 } 2321 2322 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2323 unsigned &RegNum, unsigned &RegWidth, 2324 SmallVectorImpl<AsmToken> &Tokens) { 2325 assert(isToken(AsmToken::Identifier)); 2326 unsigned Reg = getSpecialRegForName(getTokenStr()); 2327 if (Reg) { 2328 RegNum = 0; 2329 RegWidth = 1; 2330 RegKind = IS_SPECIAL; 2331 Tokens.push_back(getToken()); 2332 lex(); // skip register name 2333 } 2334 return Reg; 2335 } 2336 2337 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2338 unsigned &RegNum, unsigned &RegWidth, 2339 SmallVectorImpl<AsmToken> &Tokens) { 2340 assert(isToken(AsmToken::Identifier)); 2341 StringRef RegName = getTokenStr(); 2342 auto Loc = getLoc(); 2343 2344 const RegInfo *RI = getRegularRegInfo(RegName); 2345 if (!RI) { 2346 Error(Loc, "invalid register name"); 2347 return AMDGPU::NoRegister; 2348 } 2349 2350 Tokens.push_back(getToken()); 2351 lex(); // skip register name 2352 2353 RegKind = RI->Kind; 2354 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2355 if (!RegSuffix.empty()) { 2356 // Single 32-bit register: vXX. 2357 if (!getRegNum(RegSuffix, RegNum)) { 2358 Error(Loc, "invalid register index"); 2359 return AMDGPU::NoRegister; 2360 } 2361 RegWidth = 1; 2362 } else { 2363 // Range of registers: v[XX:YY]. ":YY" is optional. 2364 if (!ParseRegRange(RegNum, RegWidth)) 2365 return AMDGPU::NoRegister; 2366 } 2367 2368 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2369 } 2370 2371 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2372 unsigned &RegWidth, 2373 SmallVectorImpl<AsmToken> &Tokens) { 2374 unsigned Reg = AMDGPU::NoRegister; 2375 auto ListLoc = getLoc(); 2376 2377 if (!skipToken(AsmToken::LBrac, 2378 "expected a register or a list of registers")) { 2379 return AMDGPU::NoRegister; 2380 } 2381 2382 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2383 2384 auto Loc = getLoc(); 2385 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2386 return AMDGPU::NoRegister; 2387 if (RegWidth != 1) { 2388 Error(Loc, "expected a single 32-bit register"); 2389 return AMDGPU::NoRegister; 2390 } 2391 2392 for (; trySkipToken(AsmToken::Comma); ) { 2393 RegisterKind NextRegKind; 2394 unsigned NextReg, NextRegNum, NextRegWidth; 2395 Loc = getLoc(); 2396 2397 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2398 NextRegNum, NextRegWidth, 2399 Tokens)) { 2400 return AMDGPU::NoRegister; 2401 } 2402 if (NextRegWidth != 1) { 2403 Error(Loc, "expected a single 32-bit register"); 2404 return AMDGPU::NoRegister; 2405 } 2406 if (NextRegKind != RegKind) { 2407 Error(Loc, "registers in a list must be of the same kind"); 2408 return AMDGPU::NoRegister; 2409 } 2410 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2411 return AMDGPU::NoRegister; 2412 } 2413 2414 if (!skipToken(AsmToken::RBrac, 2415 "expected a comma or a closing square bracket")) { 2416 return AMDGPU::NoRegister; 2417 } 2418 2419 if (isRegularReg(RegKind)) 2420 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2421 2422 return Reg; 2423 } 2424 2425 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2426 unsigned &RegNum, unsigned &RegWidth, 2427 SmallVectorImpl<AsmToken> &Tokens) { 2428 auto Loc = getLoc(); 2429 Reg = AMDGPU::NoRegister; 2430 2431 if (isToken(AsmToken::Identifier)) { 2432 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2433 if (Reg == AMDGPU::NoRegister) 2434 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2435 } else { 2436 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2437 } 2438 2439 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2440 if (Reg == AMDGPU::NoRegister) { 2441 assert(Parser.hasPendingError()); 2442 return false; 2443 } 2444 2445 if (!subtargetHasRegister(*TRI, Reg)) { 2446 if (Reg == AMDGPU::SGPR_NULL) { 2447 Error(Loc, "'null' operand is not supported on this GPU"); 2448 } else { 2449 Error(Loc, "register not available on this GPU"); 2450 } 2451 return false; 2452 } 2453 2454 return true; 2455 } 2456 2457 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2458 unsigned &RegNum, unsigned &RegWidth, 2459 bool RestoreOnFailure /*=false*/) { 2460 Reg = AMDGPU::NoRegister; 2461 2462 SmallVector<AsmToken, 1> Tokens; 2463 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2464 if (RestoreOnFailure) { 2465 while (!Tokens.empty()) { 2466 getLexer().UnLex(Tokens.pop_back_val()); 2467 } 2468 } 2469 return true; 2470 } 2471 return false; 2472 } 2473 2474 Optional<StringRef> 2475 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2476 switch (RegKind) { 2477 case IS_VGPR: 2478 return StringRef(".amdgcn.next_free_vgpr"); 2479 case IS_SGPR: 2480 return StringRef(".amdgcn.next_free_sgpr"); 2481 default: 2482 return None; 2483 } 2484 } 2485 2486 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2487 auto SymbolName = getGprCountSymbolName(RegKind); 2488 assert(SymbolName && "initializing invalid register kind"); 2489 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2490 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2491 } 2492 2493 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2494 unsigned DwordRegIndex, 2495 unsigned RegWidth) { 2496 // Symbols are only defined for GCN targets 2497 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2498 return true; 2499 2500 auto SymbolName = getGprCountSymbolName(RegKind); 2501 if (!SymbolName) 2502 return true; 2503 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2504 2505 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2506 int64_t OldCount; 2507 2508 if (!Sym->isVariable()) 2509 return !Error(getLoc(), 2510 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2511 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2512 return !Error( 2513 getLoc(), 2514 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2515 2516 if (OldCount <= NewMax) 2517 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2518 2519 return true; 2520 } 2521 2522 std::unique_ptr<AMDGPUOperand> 2523 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2524 const auto &Tok = getToken(); 2525 SMLoc StartLoc = Tok.getLoc(); 2526 SMLoc EndLoc = Tok.getEndLoc(); 2527 RegisterKind RegKind; 2528 unsigned Reg, RegNum, RegWidth; 2529 2530 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2531 return nullptr; 2532 } 2533 if (isHsaAbiVersion3(&getSTI())) { 2534 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2535 return nullptr; 2536 } else 2537 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2538 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2539 } 2540 2541 OperandMatchResultTy 2542 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2543 // TODO: add syntactic sugar for 1/(2*PI) 2544 2545 assert(!isRegister()); 2546 assert(!isModifier()); 2547 2548 const auto& Tok = getToken(); 2549 const auto& NextTok = peekToken(); 2550 bool IsReal = Tok.is(AsmToken::Real); 2551 SMLoc S = getLoc(); 2552 bool Negate = false; 2553 2554 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2555 lex(); 2556 IsReal = true; 2557 Negate = true; 2558 } 2559 2560 if (IsReal) { 2561 // Floating-point expressions are not supported. 2562 // Can only allow floating-point literals with an 2563 // optional sign. 2564 2565 StringRef Num = getTokenStr(); 2566 lex(); 2567 2568 APFloat RealVal(APFloat::IEEEdouble()); 2569 auto roundMode = APFloat::rmNearestTiesToEven; 2570 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2571 return MatchOperand_ParseFail; 2572 } 2573 if (Negate) 2574 RealVal.changeSign(); 2575 2576 Operands.push_back( 2577 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2578 AMDGPUOperand::ImmTyNone, true)); 2579 2580 return MatchOperand_Success; 2581 2582 } else { 2583 int64_t IntVal; 2584 const MCExpr *Expr; 2585 SMLoc S = getLoc(); 2586 2587 if (HasSP3AbsModifier) { 2588 // This is a workaround for handling expressions 2589 // as arguments of SP3 'abs' modifier, for example: 2590 // |1.0| 2591 // |-1| 2592 // |1+x| 2593 // This syntax is not compatible with syntax of standard 2594 // MC expressions (due to the trailing '|'). 2595 SMLoc EndLoc; 2596 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2597 return MatchOperand_ParseFail; 2598 } else { 2599 if (Parser.parseExpression(Expr)) 2600 return MatchOperand_ParseFail; 2601 } 2602 2603 if (Expr->evaluateAsAbsolute(IntVal)) { 2604 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2605 } else { 2606 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2607 } 2608 2609 return MatchOperand_Success; 2610 } 2611 2612 return MatchOperand_NoMatch; 2613 } 2614 2615 OperandMatchResultTy 2616 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2617 if (!isRegister()) 2618 return MatchOperand_NoMatch; 2619 2620 if (auto R = parseRegister()) { 2621 assert(R->isReg()); 2622 Operands.push_back(std::move(R)); 2623 return MatchOperand_Success; 2624 } 2625 return MatchOperand_ParseFail; 2626 } 2627 2628 OperandMatchResultTy 2629 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2630 auto res = parseReg(Operands); 2631 if (res != MatchOperand_NoMatch) { 2632 return res; 2633 } else if (isModifier()) { 2634 return MatchOperand_NoMatch; 2635 } else { 2636 return parseImm(Operands, HasSP3AbsMod); 2637 } 2638 } 2639 2640 bool 2641 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2642 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2643 const auto &str = Token.getString(); 2644 return str == "abs" || str == "neg" || str == "sext"; 2645 } 2646 return false; 2647 } 2648 2649 bool 2650 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2651 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2652 } 2653 2654 bool 2655 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2656 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2657 } 2658 2659 bool 2660 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2661 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2662 } 2663 2664 // Check if this is an operand modifier or an opcode modifier 2665 // which may look like an expression but it is not. We should 2666 // avoid parsing these modifiers as expressions. Currently 2667 // recognized sequences are: 2668 // |...| 2669 // abs(...) 2670 // neg(...) 2671 // sext(...) 2672 // -reg 2673 // -|...| 2674 // -abs(...) 2675 // name:... 2676 // Note that simple opcode modifiers like 'gds' may be parsed as 2677 // expressions; this is a special case. See getExpressionAsToken. 2678 // 2679 bool 2680 AMDGPUAsmParser::isModifier() { 2681 2682 AsmToken Tok = getToken(); 2683 AsmToken NextToken[2]; 2684 peekTokens(NextToken); 2685 2686 return isOperandModifier(Tok, NextToken[0]) || 2687 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2688 isOpcodeModifierWithVal(Tok, NextToken[0]); 2689 } 2690 2691 // Check if the current token is an SP3 'neg' modifier. 2692 // Currently this modifier is allowed in the following context: 2693 // 2694 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2695 // 2. Before an 'abs' modifier: -abs(...) 2696 // 3. Before an SP3 'abs' modifier: -|...| 2697 // 2698 // In all other cases "-" is handled as a part 2699 // of an expression that follows the sign. 2700 // 2701 // Note: When "-" is followed by an integer literal, 2702 // this is interpreted as integer negation rather 2703 // than a floating-point NEG modifier applied to N. 2704 // Beside being contr-intuitive, such use of floating-point 2705 // NEG modifier would have resulted in different meaning 2706 // of integer literals used with VOP1/2/C and VOP3, 2707 // for example: 2708 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2709 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2710 // Negative fp literals with preceding "-" are 2711 // handled likewise for unifomtity 2712 // 2713 bool 2714 AMDGPUAsmParser::parseSP3NegModifier() { 2715 2716 AsmToken NextToken[2]; 2717 peekTokens(NextToken); 2718 2719 if (isToken(AsmToken::Minus) && 2720 (isRegister(NextToken[0], NextToken[1]) || 2721 NextToken[0].is(AsmToken::Pipe) || 2722 isId(NextToken[0], "abs"))) { 2723 lex(); 2724 return true; 2725 } 2726 2727 return false; 2728 } 2729 2730 OperandMatchResultTy 2731 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2732 bool AllowImm) { 2733 bool Neg, SP3Neg; 2734 bool Abs, SP3Abs; 2735 SMLoc Loc; 2736 2737 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2738 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2739 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2740 return MatchOperand_ParseFail; 2741 } 2742 2743 SP3Neg = parseSP3NegModifier(); 2744 2745 Loc = getLoc(); 2746 Neg = trySkipId("neg"); 2747 if (Neg && SP3Neg) { 2748 Error(Loc, "expected register or immediate"); 2749 return MatchOperand_ParseFail; 2750 } 2751 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2752 return MatchOperand_ParseFail; 2753 2754 Abs = trySkipId("abs"); 2755 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2756 return MatchOperand_ParseFail; 2757 2758 Loc = getLoc(); 2759 SP3Abs = trySkipToken(AsmToken::Pipe); 2760 if (Abs && SP3Abs) { 2761 Error(Loc, "expected register or immediate"); 2762 return MatchOperand_ParseFail; 2763 } 2764 2765 OperandMatchResultTy Res; 2766 if (AllowImm) { 2767 Res = parseRegOrImm(Operands, SP3Abs); 2768 } else { 2769 Res = parseReg(Operands); 2770 } 2771 if (Res != MatchOperand_Success) { 2772 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2773 } 2774 2775 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2776 return MatchOperand_ParseFail; 2777 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2778 return MatchOperand_ParseFail; 2779 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2780 return MatchOperand_ParseFail; 2781 2782 AMDGPUOperand::Modifiers Mods; 2783 Mods.Abs = Abs || SP3Abs; 2784 Mods.Neg = Neg || SP3Neg; 2785 2786 if (Mods.hasFPModifiers()) { 2787 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2788 if (Op.isExpr()) { 2789 Error(Op.getStartLoc(), "expected an absolute expression"); 2790 return MatchOperand_ParseFail; 2791 } 2792 Op.setModifiers(Mods); 2793 } 2794 return MatchOperand_Success; 2795 } 2796 2797 OperandMatchResultTy 2798 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2799 bool AllowImm) { 2800 bool Sext = trySkipId("sext"); 2801 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2802 return MatchOperand_ParseFail; 2803 2804 OperandMatchResultTy Res; 2805 if (AllowImm) { 2806 Res = parseRegOrImm(Operands); 2807 } else { 2808 Res = parseReg(Operands); 2809 } 2810 if (Res != MatchOperand_Success) { 2811 return Sext? MatchOperand_ParseFail : Res; 2812 } 2813 2814 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2815 return MatchOperand_ParseFail; 2816 2817 AMDGPUOperand::Modifiers Mods; 2818 Mods.Sext = Sext; 2819 2820 if (Mods.hasIntModifiers()) { 2821 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2822 if (Op.isExpr()) { 2823 Error(Op.getStartLoc(), "expected an absolute expression"); 2824 return MatchOperand_ParseFail; 2825 } 2826 Op.setModifiers(Mods); 2827 } 2828 2829 return MatchOperand_Success; 2830 } 2831 2832 OperandMatchResultTy 2833 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2834 return parseRegOrImmWithFPInputMods(Operands, false); 2835 } 2836 2837 OperandMatchResultTy 2838 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2839 return parseRegOrImmWithIntInputMods(Operands, false); 2840 } 2841 2842 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2843 auto Loc = getLoc(); 2844 if (trySkipId("off")) { 2845 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2846 AMDGPUOperand::ImmTyOff, false)); 2847 return MatchOperand_Success; 2848 } 2849 2850 if (!isRegister()) 2851 return MatchOperand_NoMatch; 2852 2853 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2854 if (Reg) { 2855 Operands.push_back(std::move(Reg)); 2856 return MatchOperand_Success; 2857 } 2858 2859 return MatchOperand_ParseFail; 2860 2861 } 2862 2863 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2864 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2865 2866 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2867 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2868 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2869 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2870 return Match_InvalidOperand; 2871 2872 if ((TSFlags & SIInstrFlags::VOP3) && 2873 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2874 getForcedEncodingSize() != 64) 2875 return Match_PreferE32; 2876 2877 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2878 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2879 // v_mac_f32/16 allow only dst_sel == DWORD; 2880 auto OpNum = 2881 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2882 const auto &Op = Inst.getOperand(OpNum); 2883 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2884 return Match_InvalidOperand; 2885 } 2886 } 2887 2888 return Match_Success; 2889 } 2890 2891 static ArrayRef<unsigned> getAllVariants() { 2892 static const unsigned Variants[] = { 2893 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2894 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2895 }; 2896 2897 return makeArrayRef(Variants); 2898 } 2899 2900 // What asm variants we should check 2901 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2902 if (getForcedEncodingSize() == 32) { 2903 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2904 return makeArrayRef(Variants); 2905 } 2906 2907 if (isForcedVOP3()) { 2908 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2909 return makeArrayRef(Variants); 2910 } 2911 2912 if (isForcedSDWA()) { 2913 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2914 AMDGPUAsmVariants::SDWA9}; 2915 return makeArrayRef(Variants); 2916 } 2917 2918 if (isForcedDPP()) { 2919 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2920 return makeArrayRef(Variants); 2921 } 2922 2923 return getAllVariants(); 2924 } 2925 2926 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2927 if (getForcedEncodingSize() == 32) 2928 return "e32"; 2929 2930 if (isForcedVOP3()) 2931 return "e64"; 2932 2933 if (isForcedSDWA()) 2934 return "sdwa"; 2935 2936 if (isForcedDPP()) 2937 return "dpp"; 2938 2939 return ""; 2940 } 2941 2942 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2943 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2944 const unsigned Num = Desc.getNumImplicitUses(); 2945 for (unsigned i = 0; i < Num; ++i) { 2946 unsigned Reg = Desc.ImplicitUses[i]; 2947 switch (Reg) { 2948 case AMDGPU::FLAT_SCR: 2949 case AMDGPU::VCC: 2950 case AMDGPU::VCC_LO: 2951 case AMDGPU::VCC_HI: 2952 case AMDGPU::M0: 2953 return Reg; 2954 default: 2955 break; 2956 } 2957 } 2958 return AMDGPU::NoRegister; 2959 } 2960 2961 // NB: This code is correct only when used to check constant 2962 // bus limitations because GFX7 support no f16 inline constants. 2963 // Note that there are no cases when a GFX7 opcode violates 2964 // constant bus limitations due to the use of an f16 constant. 2965 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2966 unsigned OpIdx) const { 2967 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2968 2969 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2970 return false; 2971 } 2972 2973 const MCOperand &MO = Inst.getOperand(OpIdx); 2974 2975 int64_t Val = MO.getImm(); 2976 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2977 2978 switch (OpSize) { // expected operand size 2979 case 8: 2980 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2981 case 4: 2982 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2983 case 2: { 2984 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2985 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2986 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2987 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2988 return AMDGPU::isInlinableIntLiteral(Val); 2989 2990 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2991 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2992 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2993 return AMDGPU::isInlinableIntLiteralV216(Val); 2994 2995 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2996 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2997 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2998 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2999 3000 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3001 } 3002 default: 3003 llvm_unreachable("invalid operand size"); 3004 } 3005 } 3006 3007 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3008 if (!isGFX10Plus()) 3009 return 1; 3010 3011 switch (Opcode) { 3012 // 64-bit shift instructions can use only one scalar value input 3013 case AMDGPU::V_LSHLREV_B64_e64: 3014 case AMDGPU::V_LSHLREV_B64_gfx10: 3015 case AMDGPU::V_LSHRREV_B64_e64: 3016 case AMDGPU::V_LSHRREV_B64_gfx10: 3017 case AMDGPU::V_ASHRREV_I64_e64: 3018 case AMDGPU::V_ASHRREV_I64_gfx10: 3019 case AMDGPU::V_LSHL_B64_e64: 3020 case AMDGPU::V_LSHR_B64_e64: 3021 case AMDGPU::V_ASHR_I64_e64: 3022 return 1; 3023 default: 3024 return 2; 3025 } 3026 } 3027 3028 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3029 const MCOperand &MO = Inst.getOperand(OpIdx); 3030 if (MO.isImm()) { 3031 return !isInlineConstant(Inst, OpIdx); 3032 } else if (MO.isReg()) { 3033 auto Reg = MO.getReg(); 3034 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3035 auto PReg = mc2PseudoReg(Reg); 3036 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3037 } else { 3038 return true; 3039 } 3040 } 3041 3042 bool 3043 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3044 const OperandVector &Operands) { 3045 const unsigned Opcode = Inst.getOpcode(); 3046 const MCInstrDesc &Desc = MII.get(Opcode); 3047 unsigned LastSGPR = AMDGPU::NoRegister; 3048 unsigned ConstantBusUseCount = 0; 3049 unsigned NumLiterals = 0; 3050 unsigned LiteralSize; 3051 3052 if (Desc.TSFlags & 3053 (SIInstrFlags::VOPC | 3054 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3055 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3056 SIInstrFlags::SDWA)) { 3057 // Check special imm operands (used by madmk, etc) 3058 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3059 ++ConstantBusUseCount; 3060 } 3061 3062 SmallDenseSet<unsigned> SGPRsUsed; 3063 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3064 if (SGPRUsed != AMDGPU::NoRegister) { 3065 SGPRsUsed.insert(SGPRUsed); 3066 ++ConstantBusUseCount; 3067 } 3068 3069 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3070 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3071 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3072 3073 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3074 3075 for (int OpIdx : OpIndices) { 3076 if (OpIdx == -1) break; 3077 3078 const MCOperand &MO = Inst.getOperand(OpIdx); 3079 if (usesConstantBus(Inst, OpIdx)) { 3080 if (MO.isReg()) { 3081 LastSGPR = mc2PseudoReg(MO.getReg()); 3082 // Pairs of registers with a partial intersections like these 3083 // s0, s[0:1] 3084 // flat_scratch_lo, flat_scratch 3085 // flat_scratch_lo, flat_scratch_hi 3086 // are theoretically valid but they are disabled anyway. 3087 // Note that this code mimics SIInstrInfo::verifyInstruction 3088 if (!SGPRsUsed.count(LastSGPR)) { 3089 SGPRsUsed.insert(LastSGPR); 3090 ++ConstantBusUseCount; 3091 } 3092 } else { // Expression or a literal 3093 3094 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3095 continue; // special operand like VINTERP attr_chan 3096 3097 // An instruction may use only one literal. 3098 // This has been validated on the previous step. 3099 // See validateVOP3Literal. 3100 // This literal may be used as more than one operand. 3101 // If all these operands are of the same size, 3102 // this literal counts as one scalar value. 3103 // Otherwise it counts as 2 scalar values. 3104 // See "GFX10 Shader Programming", section 3.6.2.3. 3105 3106 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3107 if (Size < 4) Size = 4; 3108 3109 if (NumLiterals == 0) { 3110 NumLiterals = 1; 3111 LiteralSize = Size; 3112 } else if (LiteralSize != Size) { 3113 NumLiterals = 2; 3114 } 3115 } 3116 } 3117 } 3118 } 3119 ConstantBusUseCount += NumLiterals; 3120 3121 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3122 return true; 3123 3124 SMLoc LitLoc = getLitLoc(Operands); 3125 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3126 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3127 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3128 return false; 3129 } 3130 3131 bool 3132 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3133 const OperandVector &Operands) { 3134 const unsigned Opcode = Inst.getOpcode(); 3135 const MCInstrDesc &Desc = MII.get(Opcode); 3136 3137 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3138 if (DstIdx == -1 || 3139 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3140 return true; 3141 } 3142 3143 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3144 3145 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3146 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3147 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3148 3149 assert(DstIdx != -1); 3150 const MCOperand &Dst = Inst.getOperand(DstIdx); 3151 assert(Dst.isReg()); 3152 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3153 3154 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3155 3156 for (int SrcIdx : SrcIndices) { 3157 if (SrcIdx == -1) break; 3158 const MCOperand &Src = Inst.getOperand(SrcIdx); 3159 if (Src.isReg()) { 3160 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3161 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3162 Error(getRegLoc(SrcReg, Operands), 3163 "destination must be different than all sources"); 3164 return false; 3165 } 3166 } 3167 } 3168 3169 return true; 3170 } 3171 3172 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3173 3174 const unsigned Opc = Inst.getOpcode(); 3175 const MCInstrDesc &Desc = MII.get(Opc); 3176 3177 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3178 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3179 assert(ClampIdx != -1); 3180 return Inst.getOperand(ClampIdx).getImm() == 0; 3181 } 3182 3183 return true; 3184 } 3185 3186 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3187 3188 const unsigned Opc = Inst.getOpcode(); 3189 const MCInstrDesc &Desc = MII.get(Opc); 3190 3191 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3192 return true; 3193 3194 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3195 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3196 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3197 3198 assert(VDataIdx != -1); 3199 3200 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3201 return true; 3202 3203 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3204 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3205 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3206 if (DMask == 0) 3207 DMask = 1; 3208 3209 unsigned DataSize = 3210 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3211 if (hasPackedD16()) { 3212 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3213 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3214 DataSize = (DataSize + 1) / 2; 3215 } 3216 3217 return (VDataSize / 4) == DataSize + TFESize; 3218 } 3219 3220 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3221 const unsigned Opc = Inst.getOpcode(); 3222 const MCInstrDesc &Desc = MII.get(Opc); 3223 3224 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3225 return true; 3226 3227 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3228 3229 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3230 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3231 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3232 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3233 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3234 3235 assert(VAddr0Idx != -1); 3236 assert(SrsrcIdx != -1); 3237 assert(SrsrcIdx > VAddr0Idx); 3238 3239 if (DimIdx == -1) 3240 return true; // intersect_ray 3241 3242 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3243 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3244 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3245 unsigned VAddrSize = 3246 IsNSA ? SrsrcIdx - VAddr0Idx 3247 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3248 3249 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3250 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3251 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3252 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3253 if (!IsNSA) { 3254 if (AddrSize > 8) 3255 AddrSize = 16; 3256 else if (AddrSize > 4) 3257 AddrSize = 8; 3258 } 3259 3260 return VAddrSize == AddrSize; 3261 } 3262 3263 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3264 3265 const unsigned Opc = Inst.getOpcode(); 3266 const MCInstrDesc &Desc = MII.get(Opc); 3267 3268 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3269 return true; 3270 if (!Desc.mayLoad() || !Desc.mayStore()) 3271 return true; // Not atomic 3272 3273 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3274 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3275 3276 // This is an incomplete check because image_atomic_cmpswap 3277 // may only use 0x3 and 0xf while other atomic operations 3278 // may use 0x1 and 0x3. However these limitations are 3279 // verified when we check that dmask matches dst size. 3280 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3281 } 3282 3283 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3284 3285 const unsigned Opc = Inst.getOpcode(); 3286 const MCInstrDesc &Desc = MII.get(Opc); 3287 3288 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3289 return true; 3290 3291 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3292 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3293 3294 // GATHER4 instructions use dmask in a different fashion compared to 3295 // other MIMG instructions. The only useful DMASK values are 3296 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3297 // (red,red,red,red) etc.) The ISA document doesn't mention 3298 // this. 3299 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3300 } 3301 3302 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3303 { 3304 switch (Opcode) { 3305 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3306 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3307 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3308 return true; 3309 default: 3310 return false; 3311 } 3312 } 3313 3314 // movrels* opcodes should only allow VGPRS as src0. 3315 // This is specified in .td description for vop1/vop3, 3316 // but sdwa is handled differently. See isSDWAOperand. 3317 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3318 const OperandVector &Operands) { 3319 3320 const unsigned Opc = Inst.getOpcode(); 3321 const MCInstrDesc &Desc = MII.get(Opc); 3322 3323 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3324 return true; 3325 3326 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3327 assert(Src0Idx != -1); 3328 3329 SMLoc ErrLoc; 3330 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3331 if (Src0.isReg()) { 3332 auto Reg = mc2PseudoReg(Src0.getReg()); 3333 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3334 if (!isSGPR(Reg, TRI)) 3335 return true; 3336 ErrLoc = getRegLoc(Reg, Operands); 3337 } else { 3338 ErrLoc = getConstLoc(Operands); 3339 } 3340 3341 Error(ErrLoc, "source operand must be a VGPR"); 3342 return false; 3343 } 3344 3345 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3346 const OperandVector &Operands) { 3347 3348 const unsigned Opc = Inst.getOpcode(); 3349 3350 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3351 return true; 3352 3353 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3354 assert(Src0Idx != -1); 3355 3356 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3357 if (!Src0.isReg()) 3358 return true; 3359 3360 auto Reg = mc2PseudoReg(Src0.getReg()); 3361 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3362 if (isSGPR(Reg, TRI)) { 3363 Error(getRegLoc(Reg, Operands), 3364 "source operand must be either a VGPR or an inline constant"); 3365 return false; 3366 } 3367 3368 return true; 3369 } 3370 3371 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3372 switch (Inst.getOpcode()) { 3373 default: 3374 return true; 3375 case V_DIV_SCALE_F32_gfx6_gfx7: 3376 case V_DIV_SCALE_F32_vi: 3377 case V_DIV_SCALE_F32_gfx10: 3378 case V_DIV_SCALE_F64_gfx6_gfx7: 3379 case V_DIV_SCALE_F64_vi: 3380 case V_DIV_SCALE_F64_gfx10: 3381 break; 3382 } 3383 3384 // TODO: Check that src0 = src1 or src2. 3385 3386 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3387 AMDGPU::OpName::src2_modifiers, 3388 AMDGPU::OpName::src2_modifiers}) { 3389 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3390 .getImm() & 3391 SISrcMods::ABS) { 3392 return false; 3393 } 3394 } 3395 3396 return true; 3397 } 3398 3399 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3400 3401 const unsigned Opc = Inst.getOpcode(); 3402 const MCInstrDesc &Desc = MII.get(Opc); 3403 3404 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3405 return true; 3406 3407 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3408 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3409 if (isCI() || isSI()) 3410 return false; 3411 } 3412 3413 return true; 3414 } 3415 3416 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3417 const unsigned Opc = Inst.getOpcode(); 3418 const MCInstrDesc &Desc = MII.get(Opc); 3419 3420 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3421 return true; 3422 3423 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3424 if (DimIdx < 0) 3425 return true; 3426 3427 long Imm = Inst.getOperand(DimIdx).getImm(); 3428 if (Imm < 0 || Imm >= 8) 3429 return false; 3430 3431 return true; 3432 } 3433 3434 static bool IsRevOpcode(const unsigned Opcode) 3435 { 3436 switch (Opcode) { 3437 case AMDGPU::V_SUBREV_F32_e32: 3438 case AMDGPU::V_SUBREV_F32_e64: 3439 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3440 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3441 case AMDGPU::V_SUBREV_F32_e32_vi: 3442 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3443 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3444 case AMDGPU::V_SUBREV_F32_e64_vi: 3445 3446 case AMDGPU::V_SUBREV_CO_U32_e32: 3447 case AMDGPU::V_SUBREV_CO_U32_e64: 3448 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3449 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3450 3451 case AMDGPU::V_SUBBREV_U32_e32: 3452 case AMDGPU::V_SUBBREV_U32_e64: 3453 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3454 case AMDGPU::V_SUBBREV_U32_e32_vi: 3455 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3456 case AMDGPU::V_SUBBREV_U32_e64_vi: 3457 3458 case AMDGPU::V_SUBREV_U32_e32: 3459 case AMDGPU::V_SUBREV_U32_e64: 3460 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3461 case AMDGPU::V_SUBREV_U32_e32_vi: 3462 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3463 case AMDGPU::V_SUBREV_U32_e64_vi: 3464 3465 case AMDGPU::V_SUBREV_F16_e32: 3466 case AMDGPU::V_SUBREV_F16_e64: 3467 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3468 case AMDGPU::V_SUBREV_F16_e32_vi: 3469 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3470 case AMDGPU::V_SUBREV_F16_e64_vi: 3471 3472 case AMDGPU::V_SUBREV_U16_e32: 3473 case AMDGPU::V_SUBREV_U16_e64: 3474 case AMDGPU::V_SUBREV_U16_e32_vi: 3475 case AMDGPU::V_SUBREV_U16_e64_vi: 3476 3477 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3478 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3479 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3480 3481 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3482 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3483 3484 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3485 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3486 3487 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3488 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3489 3490 case AMDGPU::V_LSHRREV_B32_e32: 3491 case AMDGPU::V_LSHRREV_B32_e64: 3492 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3493 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3494 case AMDGPU::V_LSHRREV_B32_e32_vi: 3495 case AMDGPU::V_LSHRREV_B32_e64_vi: 3496 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3497 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3498 3499 case AMDGPU::V_ASHRREV_I32_e32: 3500 case AMDGPU::V_ASHRREV_I32_e64: 3501 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3502 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3503 case AMDGPU::V_ASHRREV_I32_e32_vi: 3504 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3505 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3506 case AMDGPU::V_ASHRREV_I32_e64_vi: 3507 3508 case AMDGPU::V_LSHLREV_B32_e32: 3509 case AMDGPU::V_LSHLREV_B32_e64: 3510 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3511 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3512 case AMDGPU::V_LSHLREV_B32_e32_vi: 3513 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3514 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3515 case AMDGPU::V_LSHLREV_B32_e64_vi: 3516 3517 case AMDGPU::V_LSHLREV_B16_e32: 3518 case AMDGPU::V_LSHLREV_B16_e64: 3519 case AMDGPU::V_LSHLREV_B16_e32_vi: 3520 case AMDGPU::V_LSHLREV_B16_e64_vi: 3521 case AMDGPU::V_LSHLREV_B16_gfx10: 3522 3523 case AMDGPU::V_LSHRREV_B16_e32: 3524 case AMDGPU::V_LSHRREV_B16_e64: 3525 case AMDGPU::V_LSHRREV_B16_e32_vi: 3526 case AMDGPU::V_LSHRREV_B16_e64_vi: 3527 case AMDGPU::V_LSHRREV_B16_gfx10: 3528 3529 case AMDGPU::V_ASHRREV_I16_e32: 3530 case AMDGPU::V_ASHRREV_I16_e64: 3531 case AMDGPU::V_ASHRREV_I16_e32_vi: 3532 case AMDGPU::V_ASHRREV_I16_e64_vi: 3533 case AMDGPU::V_ASHRREV_I16_gfx10: 3534 3535 case AMDGPU::V_LSHLREV_B64_e64: 3536 case AMDGPU::V_LSHLREV_B64_gfx10: 3537 case AMDGPU::V_LSHLREV_B64_vi: 3538 3539 case AMDGPU::V_LSHRREV_B64_e64: 3540 case AMDGPU::V_LSHRREV_B64_gfx10: 3541 case AMDGPU::V_LSHRREV_B64_vi: 3542 3543 case AMDGPU::V_ASHRREV_I64_e64: 3544 case AMDGPU::V_ASHRREV_I64_gfx10: 3545 case AMDGPU::V_ASHRREV_I64_vi: 3546 3547 case AMDGPU::V_PK_LSHLREV_B16: 3548 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3549 case AMDGPU::V_PK_LSHLREV_B16_vi: 3550 3551 case AMDGPU::V_PK_LSHRREV_B16: 3552 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3553 case AMDGPU::V_PK_LSHRREV_B16_vi: 3554 case AMDGPU::V_PK_ASHRREV_I16: 3555 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3556 case AMDGPU::V_PK_ASHRREV_I16_vi: 3557 return true; 3558 default: 3559 return false; 3560 } 3561 } 3562 3563 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3564 3565 using namespace SIInstrFlags; 3566 const unsigned Opcode = Inst.getOpcode(); 3567 const MCInstrDesc &Desc = MII.get(Opcode); 3568 3569 // lds_direct register is defined so that it can be used 3570 // with 9-bit operands only. Ignore encodings which do not accept these. 3571 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3572 return true; 3573 3574 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3575 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3576 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3577 3578 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3579 3580 // lds_direct cannot be specified as either src1 or src2. 3581 for (int SrcIdx : SrcIndices) { 3582 if (SrcIdx == -1) break; 3583 const MCOperand &Src = Inst.getOperand(SrcIdx); 3584 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3585 return false; 3586 } 3587 } 3588 3589 if (Src0Idx == -1) 3590 return true; 3591 3592 const MCOperand &Src = Inst.getOperand(Src0Idx); 3593 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3594 return true; 3595 3596 // lds_direct is specified as src0. Check additional limitations. 3597 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3598 } 3599 3600 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3601 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3602 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3603 if (Op.isFlatOffset()) 3604 return Op.getStartLoc(); 3605 } 3606 return getLoc(); 3607 } 3608 3609 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3610 const OperandVector &Operands) { 3611 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3612 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3613 return true; 3614 3615 auto Opcode = Inst.getOpcode(); 3616 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3617 assert(OpNum != -1); 3618 3619 const auto &Op = Inst.getOperand(OpNum); 3620 if (!hasFlatOffsets() && Op.getImm() != 0) { 3621 Error(getFlatOffsetLoc(Operands), 3622 "flat offset modifier is not supported on this GPU"); 3623 return false; 3624 } 3625 3626 // For FLAT segment the offset must be positive; 3627 // MSB is ignored and forced to zero. 3628 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3629 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3630 if (!isIntN(OffsetSize, Op.getImm())) { 3631 Error(getFlatOffsetLoc(Operands), 3632 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3633 return false; 3634 } 3635 } else { 3636 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3637 if (!isUIntN(OffsetSize, Op.getImm())) { 3638 Error(getFlatOffsetLoc(Operands), 3639 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3640 return false; 3641 } 3642 } 3643 3644 return true; 3645 } 3646 3647 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3648 // Start with second operand because SMEM Offset cannot be dst or src0. 3649 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3650 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3651 if (Op.isSMEMOffset()) 3652 return Op.getStartLoc(); 3653 } 3654 return getLoc(); 3655 } 3656 3657 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3658 const OperandVector &Operands) { 3659 if (isCI() || isSI()) 3660 return true; 3661 3662 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3663 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3664 return true; 3665 3666 auto Opcode = Inst.getOpcode(); 3667 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3668 if (OpNum == -1) 3669 return true; 3670 3671 const auto &Op = Inst.getOperand(OpNum); 3672 if (!Op.isImm()) 3673 return true; 3674 3675 uint64_t Offset = Op.getImm(); 3676 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3677 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3678 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3679 return true; 3680 3681 Error(getSMEMOffsetLoc(Operands), 3682 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3683 "expected a 21-bit signed offset"); 3684 3685 return false; 3686 } 3687 3688 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3689 unsigned Opcode = Inst.getOpcode(); 3690 const MCInstrDesc &Desc = MII.get(Opcode); 3691 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3692 return true; 3693 3694 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3695 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3696 3697 const int OpIndices[] = { Src0Idx, Src1Idx }; 3698 3699 unsigned NumExprs = 0; 3700 unsigned NumLiterals = 0; 3701 uint32_t LiteralValue; 3702 3703 for (int OpIdx : OpIndices) { 3704 if (OpIdx == -1) break; 3705 3706 const MCOperand &MO = Inst.getOperand(OpIdx); 3707 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3708 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3709 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3710 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3711 if (NumLiterals == 0 || LiteralValue != Value) { 3712 LiteralValue = Value; 3713 ++NumLiterals; 3714 } 3715 } else if (MO.isExpr()) { 3716 ++NumExprs; 3717 } 3718 } 3719 } 3720 3721 return NumLiterals + NumExprs <= 1; 3722 } 3723 3724 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3725 const unsigned Opc = Inst.getOpcode(); 3726 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3727 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3728 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3729 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3730 3731 if (OpSel & ~3) 3732 return false; 3733 } 3734 return true; 3735 } 3736 3737 // Check if VCC register matches wavefront size 3738 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3739 auto FB = getFeatureBits(); 3740 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3741 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3742 } 3743 3744 // VOP3 literal is only allowed in GFX10+ and only one can be used 3745 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3746 const OperandVector &Operands) { 3747 unsigned Opcode = Inst.getOpcode(); 3748 const MCInstrDesc &Desc = MII.get(Opcode); 3749 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3750 return true; 3751 3752 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3753 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3754 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3755 3756 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3757 3758 unsigned NumExprs = 0; 3759 unsigned NumLiterals = 0; 3760 uint32_t LiteralValue; 3761 3762 for (int OpIdx : OpIndices) { 3763 if (OpIdx == -1) break; 3764 3765 const MCOperand &MO = Inst.getOperand(OpIdx); 3766 if (!MO.isImm() && !MO.isExpr()) 3767 continue; 3768 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3769 continue; 3770 3771 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3772 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3773 Error(getConstLoc(Operands), 3774 "inline constants are not allowed for this operand"); 3775 return false; 3776 } 3777 3778 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3779 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3780 if (NumLiterals == 0 || LiteralValue != Value) { 3781 LiteralValue = Value; 3782 ++NumLiterals; 3783 } 3784 } else if (MO.isExpr()) { 3785 ++NumExprs; 3786 } 3787 } 3788 NumLiterals += NumExprs; 3789 3790 if (!NumLiterals) 3791 return true; 3792 3793 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3794 Error(getLitLoc(Operands), "literal operands are not supported"); 3795 return false; 3796 } 3797 3798 if (NumLiterals > 1) { 3799 Error(getLitLoc(Operands), "only one literal operand is allowed"); 3800 return false; 3801 } 3802 3803 return true; 3804 } 3805 3806 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 3807 const OperandVector &Operands, 3808 const SMLoc &IDLoc) { 3809 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 3810 AMDGPU::OpName::glc1); 3811 if (GLCPos != -1) { 3812 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 3813 // in the asm string, and the default value means it is not present. 3814 if (Inst.getOperand(GLCPos).getImm() == -1) { 3815 Error(IDLoc, "instruction must use glc"); 3816 return false; 3817 } 3818 } 3819 3820 return true; 3821 } 3822 3823 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3824 const SMLoc &IDLoc, 3825 const OperandVector &Operands) { 3826 if (!validateLdsDirect(Inst)) { 3827 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 3828 "invalid use of lds_direct"); 3829 return false; 3830 } 3831 if (!validateSOPLiteral(Inst)) { 3832 Error(getLitLoc(Operands), 3833 "only one literal operand is allowed"); 3834 return false; 3835 } 3836 if (!validateVOP3Literal(Inst, Operands)) { 3837 return false; 3838 } 3839 if (!validateConstantBusLimitations(Inst, Operands)) { 3840 return false; 3841 } 3842 if (!validateEarlyClobberLimitations(Inst, Operands)) { 3843 return false; 3844 } 3845 if (!validateIntClampSupported(Inst)) { 3846 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 3847 "integer clamping is not supported on this GPU"); 3848 return false; 3849 } 3850 if (!validateOpSel(Inst)) { 3851 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 3852 "invalid op_sel operand"); 3853 return false; 3854 } 3855 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3856 if (!validateMIMGD16(Inst)) { 3857 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 3858 "d16 modifier is not supported on this GPU"); 3859 return false; 3860 } 3861 if (!validateMIMGDim(Inst)) { 3862 Error(IDLoc, "dim modifier is required on this GPU"); 3863 return false; 3864 } 3865 if (!validateMIMGDataSize(Inst)) { 3866 Error(IDLoc, 3867 "image data size does not match dmask and tfe"); 3868 return false; 3869 } 3870 if (!validateMIMGAddrSize(Inst)) { 3871 Error(IDLoc, 3872 "image address size does not match dim and a16"); 3873 return false; 3874 } 3875 if (!validateMIMGAtomicDMask(Inst)) { 3876 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3877 "invalid atomic image dmask"); 3878 return false; 3879 } 3880 if (!validateMIMGGatherDMask(Inst)) { 3881 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3882 "invalid image_gather dmask: only one bit must be set"); 3883 return false; 3884 } 3885 if (!validateMovrels(Inst, Operands)) { 3886 return false; 3887 } 3888 if (!validateFlatOffset(Inst, Operands)) { 3889 return false; 3890 } 3891 if (!validateSMEMOffset(Inst, Operands)) { 3892 return false; 3893 } 3894 if (!validateMAIAccWrite(Inst, Operands)) { 3895 return false; 3896 } 3897 if (!validateDivScale(Inst)) { 3898 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 3899 return false; 3900 } 3901 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 3902 return false; 3903 } 3904 3905 return true; 3906 } 3907 3908 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3909 const FeatureBitset &FBS, 3910 unsigned VariantID = 0); 3911 3912 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3913 const FeatureBitset &AvailableFeatures, 3914 unsigned VariantID); 3915 3916 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3917 const FeatureBitset &FBS) { 3918 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3919 } 3920 3921 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3922 const FeatureBitset &FBS, 3923 ArrayRef<unsigned> Variants) { 3924 for (auto Variant : Variants) { 3925 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3926 return true; 3927 } 3928 3929 return false; 3930 } 3931 3932 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3933 const SMLoc &IDLoc) { 3934 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3935 3936 // Check if requested instruction variant is supported. 3937 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3938 return false; 3939 3940 // This instruction is not supported. 3941 // Clear any other pending errors because they are no longer relevant. 3942 getParser().clearPendingErrors(); 3943 3944 // Requested instruction variant is not supported. 3945 // Check if any other variants are supported. 3946 StringRef VariantName = getMatchedVariantName(); 3947 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3948 return Error(IDLoc, 3949 Twine(VariantName, 3950 " variant of this instruction is not supported")); 3951 } 3952 3953 // Finally check if this instruction is supported on any other GPU. 3954 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3955 return Error(IDLoc, "instruction not supported on this GPU"); 3956 } 3957 3958 // Instruction not supported on any GPU. Probably a typo. 3959 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3960 return Error(IDLoc, "invalid instruction" + Suggestion); 3961 } 3962 3963 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3964 OperandVector &Operands, 3965 MCStreamer &Out, 3966 uint64_t &ErrorInfo, 3967 bool MatchingInlineAsm) { 3968 MCInst Inst; 3969 unsigned Result = Match_Success; 3970 for (auto Variant : getMatchedVariants()) { 3971 uint64_t EI; 3972 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3973 Variant); 3974 // We order match statuses from least to most specific. We use most specific 3975 // status as resulting 3976 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3977 if ((R == Match_Success) || 3978 (R == Match_PreferE32) || 3979 (R == Match_MissingFeature && Result != Match_PreferE32) || 3980 (R == Match_InvalidOperand && Result != Match_MissingFeature 3981 && Result != Match_PreferE32) || 3982 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3983 && Result != Match_MissingFeature 3984 && Result != Match_PreferE32)) { 3985 Result = R; 3986 ErrorInfo = EI; 3987 } 3988 if (R == Match_Success) 3989 break; 3990 } 3991 3992 if (Result == Match_Success) { 3993 if (!validateInstruction(Inst, IDLoc, Operands)) { 3994 return true; 3995 } 3996 Inst.setLoc(IDLoc); 3997 Out.emitInstruction(Inst, getSTI()); 3998 return false; 3999 } 4000 4001 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4002 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4003 return true; 4004 } 4005 4006 switch (Result) { 4007 default: break; 4008 case Match_MissingFeature: 4009 // It has been verified that the specified instruction 4010 // mnemonic is valid. A match was found but it requires 4011 // features which are not supported on this GPU. 4012 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4013 4014 case Match_InvalidOperand: { 4015 SMLoc ErrorLoc = IDLoc; 4016 if (ErrorInfo != ~0ULL) { 4017 if (ErrorInfo >= Operands.size()) { 4018 return Error(IDLoc, "too few operands for instruction"); 4019 } 4020 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4021 if (ErrorLoc == SMLoc()) 4022 ErrorLoc = IDLoc; 4023 } 4024 return Error(ErrorLoc, "invalid operand for instruction"); 4025 } 4026 4027 case Match_PreferE32: 4028 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4029 "should be encoded as e32"); 4030 case Match_MnemonicFail: 4031 llvm_unreachable("Invalid instructions should have been handled already"); 4032 } 4033 llvm_unreachable("Implement any new match types added!"); 4034 } 4035 4036 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4037 int64_t Tmp = -1; 4038 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4039 return true; 4040 } 4041 if (getParser().parseAbsoluteExpression(Tmp)) { 4042 return true; 4043 } 4044 Ret = static_cast<uint32_t>(Tmp); 4045 return false; 4046 } 4047 4048 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4049 uint32_t &Minor) { 4050 if (ParseAsAbsoluteExpression(Major)) 4051 return TokError("invalid major version"); 4052 4053 if (!trySkipToken(AsmToken::Comma)) 4054 return TokError("minor version number required, comma expected"); 4055 4056 if (ParseAsAbsoluteExpression(Minor)) 4057 return TokError("invalid minor version"); 4058 4059 return false; 4060 } 4061 4062 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4063 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4064 return TokError("directive only supported for amdgcn architecture"); 4065 4066 std::string Target; 4067 4068 SMLoc TargetStart = getLoc(); 4069 if (getParser().parseEscapedString(Target)) 4070 return true; 4071 SMRange TargetRange = SMRange(TargetStart, getLoc()); 4072 4073 std::string ExpectedTarget; 4074 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4075 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4076 4077 if (Target != ExpectedTargetOS.str()) 4078 return Error(TargetRange.Start, "target must match options", TargetRange); 4079 4080 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4081 return false; 4082 } 4083 4084 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4085 return Error(Range.Start, "value out of range", Range); 4086 } 4087 4088 bool AMDGPUAsmParser::calculateGPRBlocks( 4089 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4090 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4091 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4092 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4093 // TODO(scott.linder): These calculations are duplicated from 4094 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4095 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4096 4097 unsigned NumVGPRs = NextFreeVGPR; 4098 unsigned NumSGPRs = NextFreeSGPR; 4099 4100 if (Version.Major >= 10) 4101 NumSGPRs = 0; 4102 else { 4103 unsigned MaxAddressableNumSGPRs = 4104 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4105 4106 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4107 NumSGPRs > MaxAddressableNumSGPRs) 4108 return OutOfRangeError(SGPRRange); 4109 4110 NumSGPRs += 4111 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4112 4113 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4114 NumSGPRs > MaxAddressableNumSGPRs) 4115 return OutOfRangeError(SGPRRange); 4116 4117 if (Features.test(FeatureSGPRInitBug)) 4118 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4119 } 4120 4121 VGPRBlocks = 4122 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4123 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4124 4125 return false; 4126 } 4127 4128 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4129 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4130 return TokError("directive only supported for amdgcn architecture"); 4131 4132 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4133 return TokError("directive only supported for amdhsa OS"); 4134 4135 StringRef KernelName; 4136 if (getParser().parseIdentifier(KernelName)) 4137 return true; 4138 4139 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4140 4141 StringSet<> Seen; 4142 4143 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4144 4145 SMRange VGPRRange; 4146 uint64_t NextFreeVGPR = 0; 4147 SMRange SGPRRange; 4148 uint64_t NextFreeSGPR = 0; 4149 unsigned UserSGPRCount = 0; 4150 bool ReserveVCC = true; 4151 bool ReserveFlatScr = true; 4152 bool ReserveXNACK = hasXNACK(); 4153 Optional<bool> EnableWavefrontSize32; 4154 4155 while (true) { 4156 while (trySkipToken(AsmToken::EndOfStatement)); 4157 4158 StringRef ID; 4159 SMRange IDRange = getTok().getLocRange(); 4160 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4161 return true; 4162 4163 if (ID == ".end_amdhsa_kernel") 4164 break; 4165 4166 if (Seen.find(ID) != Seen.end()) 4167 return TokError(".amdhsa_ directives cannot be repeated"); 4168 Seen.insert(ID); 4169 4170 SMLoc ValStart = getLoc(); 4171 int64_t IVal; 4172 if (getParser().parseAbsoluteExpression(IVal)) 4173 return true; 4174 SMLoc ValEnd = getLoc(); 4175 SMRange ValRange = SMRange(ValStart, ValEnd); 4176 4177 if (IVal < 0) 4178 return OutOfRangeError(ValRange); 4179 4180 uint64_t Val = IVal; 4181 4182 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4183 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4184 return OutOfRangeError(RANGE); \ 4185 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4186 4187 if (ID == ".amdhsa_group_segment_fixed_size") { 4188 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4189 return OutOfRangeError(ValRange); 4190 KD.group_segment_fixed_size = Val; 4191 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4192 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4193 return OutOfRangeError(ValRange); 4194 KD.private_segment_fixed_size = Val; 4195 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4196 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4197 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4198 Val, ValRange); 4199 if (Val) 4200 UserSGPRCount += 4; 4201 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4202 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4203 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4204 ValRange); 4205 if (Val) 4206 UserSGPRCount += 2; 4207 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4208 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4209 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4210 ValRange); 4211 if (Val) 4212 UserSGPRCount += 2; 4213 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4214 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4215 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4216 Val, ValRange); 4217 if (Val) 4218 UserSGPRCount += 2; 4219 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4220 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4221 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4222 ValRange); 4223 if (Val) 4224 UserSGPRCount += 2; 4225 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4226 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4227 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4228 ValRange); 4229 if (Val) 4230 UserSGPRCount += 2; 4231 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4232 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4233 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4234 Val, ValRange); 4235 if (Val) 4236 UserSGPRCount += 1; 4237 } else if (ID == ".amdhsa_wavefront_size32") { 4238 if (IVersion.Major < 10) 4239 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4240 EnableWavefrontSize32 = Val; 4241 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4242 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4243 Val, ValRange); 4244 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4245 PARSE_BITS_ENTRY( 4246 KD.compute_pgm_rsrc2, 4247 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4248 ValRange); 4249 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4250 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4251 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4252 ValRange); 4253 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4254 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4255 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4256 ValRange); 4257 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4258 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4259 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4260 ValRange); 4261 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4262 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4263 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4264 ValRange); 4265 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4266 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4267 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4268 ValRange); 4269 } else if (ID == ".amdhsa_next_free_vgpr") { 4270 VGPRRange = ValRange; 4271 NextFreeVGPR = Val; 4272 } else if (ID == ".amdhsa_next_free_sgpr") { 4273 SGPRRange = ValRange; 4274 NextFreeSGPR = Val; 4275 } else if (ID == ".amdhsa_reserve_vcc") { 4276 if (!isUInt<1>(Val)) 4277 return OutOfRangeError(ValRange); 4278 ReserveVCC = Val; 4279 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4280 if (IVersion.Major < 7) 4281 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4282 if (!isUInt<1>(Val)) 4283 return OutOfRangeError(ValRange); 4284 ReserveFlatScr = Val; 4285 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4286 if (IVersion.Major < 8) 4287 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4288 if (!isUInt<1>(Val)) 4289 return OutOfRangeError(ValRange); 4290 ReserveXNACK = Val; 4291 } else if (ID == ".amdhsa_float_round_mode_32") { 4292 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4293 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4294 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4295 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4296 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4297 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4298 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4299 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4300 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4301 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4302 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4303 ValRange); 4304 } else if (ID == ".amdhsa_dx10_clamp") { 4305 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4306 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4307 } else if (ID == ".amdhsa_ieee_mode") { 4308 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4309 Val, ValRange); 4310 } else if (ID == ".amdhsa_fp16_overflow") { 4311 if (IVersion.Major < 9) 4312 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4313 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4314 ValRange); 4315 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4316 if (IVersion.Major < 10) 4317 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4318 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4319 ValRange); 4320 } else if (ID == ".amdhsa_memory_ordered") { 4321 if (IVersion.Major < 10) 4322 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4323 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4324 ValRange); 4325 } else if (ID == ".amdhsa_forward_progress") { 4326 if (IVersion.Major < 10) 4327 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4328 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4329 ValRange); 4330 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4331 PARSE_BITS_ENTRY( 4332 KD.compute_pgm_rsrc2, 4333 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4334 ValRange); 4335 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4336 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4337 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4338 Val, ValRange); 4339 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4340 PARSE_BITS_ENTRY( 4341 KD.compute_pgm_rsrc2, 4342 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4343 ValRange); 4344 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4345 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4346 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4347 Val, ValRange); 4348 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4349 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4350 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4351 Val, ValRange); 4352 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4353 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4354 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4355 Val, ValRange); 4356 } else if (ID == ".amdhsa_exception_int_div_zero") { 4357 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4358 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4359 Val, ValRange); 4360 } else { 4361 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4362 } 4363 4364 #undef PARSE_BITS_ENTRY 4365 } 4366 4367 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4368 return TokError(".amdhsa_next_free_vgpr directive is required"); 4369 4370 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4371 return TokError(".amdhsa_next_free_sgpr directive is required"); 4372 4373 unsigned VGPRBlocks; 4374 unsigned SGPRBlocks; 4375 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4376 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4377 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4378 SGPRBlocks)) 4379 return true; 4380 4381 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4382 VGPRBlocks)) 4383 return OutOfRangeError(VGPRRange); 4384 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4385 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4386 4387 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4388 SGPRBlocks)) 4389 return OutOfRangeError(SGPRRange); 4390 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4391 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4392 SGPRBlocks); 4393 4394 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4395 return TokError("too many user SGPRs enabled"); 4396 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4397 UserSGPRCount); 4398 4399 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4400 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4401 ReserveFlatScr, ReserveXNACK); 4402 return false; 4403 } 4404 4405 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4406 uint32_t Major; 4407 uint32_t Minor; 4408 4409 if (ParseDirectiveMajorMinor(Major, Minor)) 4410 return true; 4411 4412 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4413 return false; 4414 } 4415 4416 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4417 uint32_t Major; 4418 uint32_t Minor; 4419 uint32_t Stepping; 4420 StringRef VendorName; 4421 StringRef ArchName; 4422 4423 // If this directive has no arguments, then use the ISA version for the 4424 // targeted GPU. 4425 if (isToken(AsmToken::EndOfStatement)) { 4426 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4427 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4428 ISA.Stepping, 4429 "AMD", "AMDGPU"); 4430 return false; 4431 } 4432 4433 if (ParseDirectiveMajorMinor(Major, Minor)) 4434 return true; 4435 4436 if (!trySkipToken(AsmToken::Comma)) 4437 return TokError("stepping version number required, comma expected"); 4438 4439 if (ParseAsAbsoluteExpression(Stepping)) 4440 return TokError("invalid stepping version"); 4441 4442 if (!trySkipToken(AsmToken::Comma)) 4443 return TokError("vendor name required, comma expected"); 4444 4445 if (!parseString(VendorName, "invalid vendor name")) 4446 return true; 4447 4448 if (!trySkipToken(AsmToken::Comma)) 4449 return TokError("arch name required, comma expected"); 4450 4451 if (!parseString(ArchName, "invalid arch name")) 4452 return true; 4453 4454 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4455 VendorName, ArchName); 4456 return false; 4457 } 4458 4459 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4460 amd_kernel_code_t &Header) { 4461 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4462 // assembly for backwards compatibility. 4463 if (ID == "max_scratch_backing_memory_byte_size") { 4464 Parser.eatToEndOfStatement(); 4465 return false; 4466 } 4467 4468 SmallString<40> ErrStr; 4469 raw_svector_ostream Err(ErrStr); 4470 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4471 return TokError(Err.str()); 4472 } 4473 Lex(); 4474 4475 if (ID == "enable_wavefront_size32") { 4476 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4477 if (!isGFX10Plus()) 4478 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4479 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4480 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4481 } else { 4482 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4483 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4484 } 4485 } 4486 4487 if (ID == "wavefront_size") { 4488 if (Header.wavefront_size == 5) { 4489 if (!isGFX10Plus()) 4490 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4491 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4492 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4493 } else if (Header.wavefront_size == 6) { 4494 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4495 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4496 } 4497 } 4498 4499 if (ID == "enable_wgp_mode") { 4500 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4501 !isGFX10Plus()) 4502 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4503 } 4504 4505 if (ID == "enable_mem_ordered") { 4506 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4507 !isGFX10Plus()) 4508 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4509 } 4510 4511 if (ID == "enable_fwd_progress") { 4512 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4513 !isGFX10Plus()) 4514 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4515 } 4516 4517 return false; 4518 } 4519 4520 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4521 amd_kernel_code_t Header; 4522 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4523 4524 while (true) { 4525 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4526 // will set the current token to EndOfStatement. 4527 while(trySkipToken(AsmToken::EndOfStatement)); 4528 4529 StringRef ID; 4530 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4531 return true; 4532 4533 if (ID == ".end_amd_kernel_code_t") 4534 break; 4535 4536 if (ParseAMDKernelCodeTValue(ID, Header)) 4537 return true; 4538 } 4539 4540 getTargetStreamer().EmitAMDKernelCodeT(Header); 4541 4542 return false; 4543 } 4544 4545 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4546 StringRef KernelName; 4547 if (!parseId(KernelName, "expected symbol name")) 4548 return true; 4549 4550 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4551 ELF::STT_AMDGPU_HSA_KERNEL); 4552 4553 KernelScope.initialize(getContext()); 4554 return false; 4555 } 4556 4557 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4558 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4559 return Error(getLoc(), 4560 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4561 "architectures"); 4562 } 4563 4564 auto ISAVersionStringFromASM = getToken().getStringContents(); 4565 4566 std::string ISAVersionStringFromSTI; 4567 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4568 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4569 4570 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4571 return Error(getLoc(), 4572 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4573 "arguments specified through the command line"); 4574 } 4575 4576 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4577 Lex(); 4578 4579 return false; 4580 } 4581 4582 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4583 const char *AssemblerDirectiveBegin; 4584 const char *AssemblerDirectiveEnd; 4585 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4586 isHsaAbiVersion3(&getSTI()) 4587 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4588 HSAMD::V3::AssemblerDirectiveEnd) 4589 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4590 HSAMD::AssemblerDirectiveEnd); 4591 4592 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4593 return Error(getLoc(), 4594 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4595 "not available on non-amdhsa OSes")).str()); 4596 } 4597 4598 std::string HSAMetadataString; 4599 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4600 HSAMetadataString)) 4601 return true; 4602 4603 if (isHsaAbiVersion3(&getSTI())) { 4604 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4605 return Error(getLoc(), "invalid HSA metadata"); 4606 } else { 4607 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4608 return Error(getLoc(), "invalid HSA metadata"); 4609 } 4610 4611 return false; 4612 } 4613 4614 /// Common code to parse out a block of text (typically YAML) between start and 4615 /// end directives. 4616 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4617 const char *AssemblerDirectiveEnd, 4618 std::string &CollectString) { 4619 4620 raw_string_ostream CollectStream(CollectString); 4621 4622 getLexer().setSkipSpace(false); 4623 4624 bool FoundEnd = false; 4625 while (!isToken(AsmToken::Eof)) { 4626 while (isToken(AsmToken::Space)) { 4627 CollectStream << getTokenStr(); 4628 Lex(); 4629 } 4630 4631 if (trySkipId(AssemblerDirectiveEnd)) { 4632 FoundEnd = true; 4633 break; 4634 } 4635 4636 CollectStream << Parser.parseStringToEndOfStatement() 4637 << getContext().getAsmInfo()->getSeparatorString(); 4638 4639 Parser.eatToEndOfStatement(); 4640 } 4641 4642 getLexer().setSkipSpace(true); 4643 4644 if (isToken(AsmToken::Eof) && !FoundEnd) { 4645 return TokError(Twine("expected directive ") + 4646 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4647 } 4648 4649 CollectStream.flush(); 4650 return false; 4651 } 4652 4653 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4654 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4655 std::string String; 4656 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4657 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4658 return true; 4659 4660 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4661 if (!PALMetadata->setFromString(String)) 4662 return Error(getLoc(), "invalid PAL metadata"); 4663 return false; 4664 } 4665 4666 /// Parse the assembler directive for old linear-format PAL metadata. 4667 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4668 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4669 return Error(getLoc(), 4670 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4671 "not available on non-amdpal OSes")).str()); 4672 } 4673 4674 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4675 PALMetadata->setLegacy(); 4676 for (;;) { 4677 uint32_t Key, Value; 4678 if (ParseAsAbsoluteExpression(Key)) { 4679 return TokError(Twine("invalid value in ") + 4680 Twine(PALMD::AssemblerDirective)); 4681 } 4682 if (!trySkipToken(AsmToken::Comma)) { 4683 return TokError(Twine("expected an even number of values in ") + 4684 Twine(PALMD::AssemblerDirective)); 4685 } 4686 if (ParseAsAbsoluteExpression(Value)) { 4687 return TokError(Twine("invalid value in ") + 4688 Twine(PALMD::AssemblerDirective)); 4689 } 4690 PALMetadata->setRegister(Key, Value); 4691 if (!trySkipToken(AsmToken::Comma)) 4692 break; 4693 } 4694 return false; 4695 } 4696 4697 /// ParseDirectiveAMDGPULDS 4698 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4699 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4700 if (getParser().checkForValidSection()) 4701 return true; 4702 4703 StringRef Name; 4704 SMLoc NameLoc = getLoc(); 4705 if (getParser().parseIdentifier(Name)) 4706 return TokError("expected identifier in directive"); 4707 4708 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4709 if (parseToken(AsmToken::Comma, "expected ','")) 4710 return true; 4711 4712 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4713 4714 int64_t Size; 4715 SMLoc SizeLoc = getLoc(); 4716 if (getParser().parseAbsoluteExpression(Size)) 4717 return true; 4718 if (Size < 0) 4719 return Error(SizeLoc, "size must be non-negative"); 4720 if (Size > LocalMemorySize) 4721 return Error(SizeLoc, "size is too large"); 4722 4723 int64_t Alignment = 4; 4724 if (trySkipToken(AsmToken::Comma)) { 4725 SMLoc AlignLoc = getLoc(); 4726 if (getParser().parseAbsoluteExpression(Alignment)) 4727 return true; 4728 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4729 return Error(AlignLoc, "alignment must be a power of two"); 4730 4731 // Alignment larger than the size of LDS is possible in theory, as long 4732 // as the linker manages to place to symbol at address 0, but we do want 4733 // to make sure the alignment fits nicely into a 32-bit integer. 4734 if (Alignment >= 1u << 31) 4735 return Error(AlignLoc, "alignment is too large"); 4736 } 4737 4738 if (parseToken(AsmToken::EndOfStatement, 4739 "unexpected token in '.amdgpu_lds' directive")) 4740 return true; 4741 4742 Symbol->redefineIfPossible(); 4743 if (!Symbol->isUndefined()) 4744 return Error(NameLoc, "invalid symbol redefinition"); 4745 4746 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4747 return false; 4748 } 4749 4750 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4751 StringRef IDVal = DirectiveID.getString(); 4752 4753 if (isHsaAbiVersion3(&getSTI())) { 4754 if (IDVal == ".amdgcn_target") 4755 return ParseDirectiveAMDGCNTarget(); 4756 4757 if (IDVal == ".amdhsa_kernel") 4758 return ParseDirectiveAMDHSAKernel(); 4759 4760 // TODO: Restructure/combine with PAL metadata directive. 4761 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4762 return ParseDirectiveHSAMetadata(); 4763 } else { 4764 if (IDVal == ".hsa_code_object_version") 4765 return ParseDirectiveHSACodeObjectVersion(); 4766 4767 if (IDVal == ".hsa_code_object_isa") 4768 return ParseDirectiveHSACodeObjectISA(); 4769 4770 if (IDVal == ".amd_kernel_code_t") 4771 return ParseDirectiveAMDKernelCodeT(); 4772 4773 if (IDVal == ".amdgpu_hsa_kernel") 4774 return ParseDirectiveAMDGPUHsaKernel(); 4775 4776 if (IDVal == ".amd_amdgpu_isa") 4777 return ParseDirectiveISAVersion(); 4778 4779 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4780 return ParseDirectiveHSAMetadata(); 4781 } 4782 4783 if (IDVal == ".amdgpu_lds") 4784 return ParseDirectiveAMDGPULDS(); 4785 4786 if (IDVal == PALMD::AssemblerDirectiveBegin) 4787 return ParseDirectivePALMetadataBegin(); 4788 4789 if (IDVal == PALMD::AssemblerDirective) 4790 return ParseDirectivePALMetadata(); 4791 4792 return true; 4793 } 4794 4795 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4796 unsigned RegNo) const { 4797 4798 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4799 R.isValid(); ++R) { 4800 if (*R == RegNo) 4801 return isGFX9Plus(); 4802 } 4803 4804 // GFX10 has 2 more SGPRs 104 and 105. 4805 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4806 R.isValid(); ++R) { 4807 if (*R == RegNo) 4808 return hasSGPR104_SGPR105(); 4809 } 4810 4811 switch (RegNo) { 4812 case AMDGPU::SRC_SHARED_BASE: 4813 case AMDGPU::SRC_SHARED_LIMIT: 4814 case AMDGPU::SRC_PRIVATE_BASE: 4815 case AMDGPU::SRC_PRIVATE_LIMIT: 4816 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4817 return isGFX9Plus(); 4818 case AMDGPU::TBA: 4819 case AMDGPU::TBA_LO: 4820 case AMDGPU::TBA_HI: 4821 case AMDGPU::TMA: 4822 case AMDGPU::TMA_LO: 4823 case AMDGPU::TMA_HI: 4824 return !isGFX9Plus(); 4825 case AMDGPU::XNACK_MASK: 4826 case AMDGPU::XNACK_MASK_LO: 4827 case AMDGPU::XNACK_MASK_HI: 4828 return (isVI() || isGFX9()) && hasXNACK(); 4829 case AMDGPU::SGPR_NULL: 4830 return isGFX10Plus(); 4831 default: 4832 break; 4833 } 4834 4835 if (isCI()) 4836 return true; 4837 4838 if (isSI() || isGFX10Plus()) { 4839 // No flat_scr on SI. 4840 // On GFX10 flat scratch is not a valid register operand and can only be 4841 // accessed with s_setreg/s_getreg. 4842 switch (RegNo) { 4843 case AMDGPU::FLAT_SCR: 4844 case AMDGPU::FLAT_SCR_LO: 4845 case AMDGPU::FLAT_SCR_HI: 4846 return false; 4847 default: 4848 return true; 4849 } 4850 } 4851 4852 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4853 // SI/CI have. 4854 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4855 R.isValid(); ++R) { 4856 if (*R == RegNo) 4857 return hasSGPR102_SGPR103(); 4858 } 4859 4860 return true; 4861 } 4862 4863 OperandMatchResultTy 4864 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4865 OperandMode Mode) { 4866 // Try to parse with a custom parser 4867 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4868 4869 // If we successfully parsed the operand or if there as an error parsing, 4870 // we are done. 4871 // 4872 // If we are parsing after we reach EndOfStatement then this means we 4873 // are appending default values to the Operands list. This is only done 4874 // by custom parser, so we shouldn't continue on to the generic parsing. 4875 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4876 isToken(AsmToken::EndOfStatement)) 4877 return ResTy; 4878 4879 SMLoc RBraceLoc; 4880 SMLoc LBraceLoc = getLoc(); 4881 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 4882 unsigned Prefix = Operands.size(); 4883 4884 for (;;) { 4885 ResTy = parseReg(Operands); 4886 if (ResTy != MatchOperand_Success) 4887 return ResTy; 4888 4889 RBraceLoc = getLoc(); 4890 if (trySkipToken(AsmToken::RBrac)) 4891 break; 4892 4893 if (!trySkipToken(AsmToken::Comma)) 4894 return MatchOperand_ParseFail; 4895 } 4896 4897 if (Operands.size() - Prefix > 1) { 4898 Operands.insert(Operands.begin() + Prefix, 4899 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4900 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 4901 } 4902 4903 return MatchOperand_Success; 4904 } 4905 4906 return parseRegOrImm(Operands); 4907 } 4908 4909 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4910 // Clear any forced encodings from the previous instruction. 4911 setForcedEncodingSize(0); 4912 setForcedDPP(false); 4913 setForcedSDWA(false); 4914 4915 if (Name.endswith("_e64")) { 4916 setForcedEncodingSize(64); 4917 return Name.substr(0, Name.size() - 4); 4918 } else if (Name.endswith("_e32")) { 4919 setForcedEncodingSize(32); 4920 return Name.substr(0, Name.size() - 4); 4921 } else if (Name.endswith("_dpp")) { 4922 setForcedDPP(true); 4923 return Name.substr(0, Name.size() - 4); 4924 } else if (Name.endswith("_sdwa")) { 4925 setForcedSDWA(true); 4926 return Name.substr(0, Name.size() - 5); 4927 } 4928 return Name; 4929 } 4930 4931 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4932 StringRef Name, 4933 SMLoc NameLoc, OperandVector &Operands) { 4934 // Add the instruction mnemonic 4935 Name = parseMnemonicSuffix(Name); 4936 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4937 4938 bool IsMIMG = Name.startswith("image_"); 4939 4940 while (!trySkipToken(AsmToken::EndOfStatement)) { 4941 OperandMode Mode = OperandMode_Default; 4942 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 4943 Mode = OperandMode_NSA; 4944 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4945 4946 if (Res != MatchOperand_Success) { 4947 checkUnsupportedInstruction(Name, NameLoc); 4948 if (!Parser.hasPendingError()) { 4949 // FIXME: use real operand location rather than the current location. 4950 StringRef Msg = 4951 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4952 "not a valid operand."; 4953 Error(getLoc(), Msg); 4954 } 4955 while (!trySkipToken(AsmToken::EndOfStatement)) { 4956 lex(); 4957 } 4958 return true; 4959 } 4960 4961 // Eat the comma or space if there is one. 4962 trySkipToken(AsmToken::Comma); 4963 } 4964 4965 return false; 4966 } 4967 4968 //===----------------------------------------------------------------------===// 4969 // Utility functions 4970 //===----------------------------------------------------------------------===// 4971 4972 OperandMatchResultTy 4973 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4974 4975 if (!trySkipId(Prefix, AsmToken::Colon)) 4976 return MatchOperand_NoMatch; 4977 4978 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4979 } 4980 4981 OperandMatchResultTy 4982 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4983 AMDGPUOperand::ImmTy ImmTy, 4984 bool (*ConvertResult)(int64_t&)) { 4985 SMLoc S = getLoc(); 4986 int64_t Value = 0; 4987 4988 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4989 if (Res != MatchOperand_Success) 4990 return Res; 4991 4992 if (ConvertResult && !ConvertResult(Value)) { 4993 Error(S, "invalid " + StringRef(Prefix) + " value."); 4994 } 4995 4996 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4997 return MatchOperand_Success; 4998 } 4999 5000 OperandMatchResultTy 5001 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5002 OperandVector &Operands, 5003 AMDGPUOperand::ImmTy ImmTy, 5004 bool (*ConvertResult)(int64_t&)) { 5005 SMLoc S = getLoc(); 5006 if (!trySkipId(Prefix, AsmToken::Colon)) 5007 return MatchOperand_NoMatch; 5008 5009 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5010 return MatchOperand_ParseFail; 5011 5012 unsigned Val = 0; 5013 const unsigned MaxSize = 4; 5014 5015 // FIXME: How to verify the number of elements matches the number of src 5016 // operands? 5017 for (int I = 0; ; ++I) { 5018 int64_t Op; 5019 SMLoc Loc = getLoc(); 5020 if (!parseExpr(Op)) 5021 return MatchOperand_ParseFail; 5022 5023 if (Op != 0 && Op != 1) { 5024 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5025 return MatchOperand_ParseFail; 5026 } 5027 5028 Val |= (Op << I); 5029 5030 if (trySkipToken(AsmToken::RBrac)) 5031 break; 5032 5033 if (I + 1 == MaxSize) { 5034 Error(getLoc(), "expected a closing square bracket"); 5035 return MatchOperand_ParseFail; 5036 } 5037 5038 if (!skipToken(AsmToken::Comma, "expected a comma")) 5039 return MatchOperand_ParseFail; 5040 } 5041 5042 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5043 return MatchOperand_Success; 5044 } 5045 5046 OperandMatchResultTy 5047 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5048 AMDGPUOperand::ImmTy ImmTy) { 5049 int64_t Bit; 5050 SMLoc S = getLoc(); 5051 5052 if (trySkipId(Name)) { 5053 Bit = 1; 5054 } else if (trySkipId("no", Name)) { 5055 Bit = 0; 5056 } else { 5057 return MatchOperand_NoMatch; 5058 } 5059 5060 if (Name == "r128" && !hasMIMG_R128()) { 5061 Error(S, "r128 modifier is not supported on this GPU"); 5062 return MatchOperand_ParseFail; 5063 } 5064 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5065 Error(S, "a16 modifier is not supported on this GPU"); 5066 return MatchOperand_ParseFail; 5067 } 5068 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) { 5069 Error(S, "dlc modifier is not supported on this GPU"); 5070 return MatchOperand_ParseFail; 5071 } 5072 5073 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5074 ImmTy = AMDGPUOperand::ImmTyR128A16; 5075 5076 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5077 return MatchOperand_Success; 5078 } 5079 5080 static void addOptionalImmOperand( 5081 MCInst& Inst, const OperandVector& Operands, 5082 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5083 AMDGPUOperand::ImmTy ImmT, 5084 int64_t Default = 0) { 5085 auto i = OptionalIdx.find(ImmT); 5086 if (i != OptionalIdx.end()) { 5087 unsigned Idx = i->second; 5088 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5089 } else { 5090 Inst.addOperand(MCOperand::createImm(Default)); 5091 } 5092 } 5093 5094 OperandMatchResultTy 5095 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5096 StringRef &Value, 5097 SMLoc &StringLoc) { 5098 if (!trySkipId(Prefix, AsmToken::Colon)) 5099 return MatchOperand_NoMatch; 5100 5101 StringLoc = getLoc(); 5102 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5103 : MatchOperand_ParseFail; 5104 } 5105 5106 //===----------------------------------------------------------------------===// 5107 // MTBUF format 5108 //===----------------------------------------------------------------------===// 5109 5110 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5111 int64_t MaxVal, 5112 int64_t &Fmt) { 5113 int64_t Val; 5114 SMLoc Loc = getLoc(); 5115 5116 auto Res = parseIntWithPrefix(Pref, Val); 5117 if (Res == MatchOperand_ParseFail) 5118 return false; 5119 if (Res == MatchOperand_NoMatch) 5120 return true; 5121 5122 if (Val < 0 || Val > MaxVal) { 5123 Error(Loc, Twine("out of range ", StringRef(Pref))); 5124 return false; 5125 } 5126 5127 Fmt = Val; 5128 return true; 5129 } 5130 5131 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5132 // values to live in a joint format operand in the MCInst encoding. 5133 OperandMatchResultTy 5134 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5135 using namespace llvm::AMDGPU::MTBUFFormat; 5136 5137 int64_t Dfmt = DFMT_UNDEF; 5138 int64_t Nfmt = NFMT_UNDEF; 5139 5140 // dfmt and nfmt can appear in either order, and each is optional. 5141 for (int I = 0; I < 2; ++I) { 5142 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5143 return MatchOperand_ParseFail; 5144 5145 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5146 return MatchOperand_ParseFail; 5147 } 5148 // Skip optional comma between dfmt/nfmt 5149 // but guard against 2 commas following each other. 5150 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5151 !peekToken().is(AsmToken::Comma)) { 5152 trySkipToken(AsmToken::Comma); 5153 } 5154 } 5155 5156 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5157 return MatchOperand_NoMatch; 5158 5159 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5160 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5161 5162 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5163 return MatchOperand_Success; 5164 } 5165 5166 OperandMatchResultTy 5167 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5168 using namespace llvm::AMDGPU::MTBUFFormat; 5169 5170 int64_t Fmt = UFMT_UNDEF; 5171 5172 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5173 return MatchOperand_ParseFail; 5174 5175 if (Fmt == UFMT_UNDEF) 5176 return MatchOperand_NoMatch; 5177 5178 Format = Fmt; 5179 return MatchOperand_Success; 5180 } 5181 5182 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5183 int64_t &Nfmt, 5184 StringRef FormatStr, 5185 SMLoc Loc) { 5186 using namespace llvm::AMDGPU::MTBUFFormat; 5187 int64_t Format; 5188 5189 Format = getDfmt(FormatStr); 5190 if (Format != DFMT_UNDEF) { 5191 Dfmt = Format; 5192 return true; 5193 } 5194 5195 Format = getNfmt(FormatStr, getSTI()); 5196 if (Format != NFMT_UNDEF) { 5197 Nfmt = Format; 5198 return true; 5199 } 5200 5201 Error(Loc, "unsupported format"); 5202 return false; 5203 } 5204 5205 OperandMatchResultTy 5206 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5207 SMLoc FormatLoc, 5208 int64_t &Format) { 5209 using namespace llvm::AMDGPU::MTBUFFormat; 5210 5211 int64_t Dfmt = DFMT_UNDEF; 5212 int64_t Nfmt = NFMT_UNDEF; 5213 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5214 return MatchOperand_ParseFail; 5215 5216 if (trySkipToken(AsmToken::Comma)) { 5217 StringRef Str; 5218 SMLoc Loc = getLoc(); 5219 if (!parseId(Str, "expected a format string") || 5220 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5221 return MatchOperand_ParseFail; 5222 } 5223 if (Dfmt == DFMT_UNDEF) { 5224 Error(Loc, "duplicate numeric format"); 5225 return MatchOperand_ParseFail; 5226 } else if (Nfmt == NFMT_UNDEF) { 5227 Error(Loc, "duplicate data format"); 5228 return MatchOperand_ParseFail; 5229 } 5230 } 5231 5232 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5233 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5234 5235 if (isGFX10Plus()) { 5236 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5237 if (Ufmt == UFMT_UNDEF) { 5238 Error(FormatLoc, "unsupported format"); 5239 return MatchOperand_ParseFail; 5240 } 5241 Format = Ufmt; 5242 } else { 5243 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5244 } 5245 5246 return MatchOperand_Success; 5247 } 5248 5249 OperandMatchResultTy 5250 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5251 SMLoc Loc, 5252 int64_t &Format) { 5253 using namespace llvm::AMDGPU::MTBUFFormat; 5254 5255 auto Id = getUnifiedFormat(FormatStr); 5256 if (Id == UFMT_UNDEF) 5257 return MatchOperand_NoMatch; 5258 5259 if (!isGFX10Plus()) { 5260 Error(Loc, "unified format is not supported on this GPU"); 5261 return MatchOperand_ParseFail; 5262 } 5263 5264 Format = Id; 5265 return MatchOperand_Success; 5266 } 5267 5268 OperandMatchResultTy 5269 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5270 using namespace llvm::AMDGPU::MTBUFFormat; 5271 SMLoc Loc = getLoc(); 5272 5273 if (!parseExpr(Format)) 5274 return MatchOperand_ParseFail; 5275 if (!isValidFormatEncoding(Format, getSTI())) { 5276 Error(Loc, "out of range format"); 5277 return MatchOperand_ParseFail; 5278 } 5279 5280 return MatchOperand_Success; 5281 } 5282 5283 OperandMatchResultTy 5284 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5285 using namespace llvm::AMDGPU::MTBUFFormat; 5286 5287 if (!trySkipId("format", AsmToken::Colon)) 5288 return MatchOperand_NoMatch; 5289 5290 if (trySkipToken(AsmToken::LBrac)) { 5291 StringRef FormatStr; 5292 SMLoc Loc = getLoc(); 5293 if (!parseId(FormatStr, "expected a format string")) 5294 return MatchOperand_ParseFail; 5295 5296 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5297 if (Res == MatchOperand_NoMatch) 5298 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5299 if (Res != MatchOperand_Success) 5300 return Res; 5301 5302 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5303 return MatchOperand_ParseFail; 5304 5305 return MatchOperand_Success; 5306 } 5307 5308 return parseNumericFormat(Format); 5309 } 5310 5311 OperandMatchResultTy 5312 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5313 using namespace llvm::AMDGPU::MTBUFFormat; 5314 5315 int64_t Format = getDefaultFormatEncoding(getSTI()); 5316 OperandMatchResultTy Res; 5317 SMLoc Loc = getLoc(); 5318 5319 // Parse legacy format syntax. 5320 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5321 if (Res == MatchOperand_ParseFail) 5322 return Res; 5323 5324 bool FormatFound = (Res == MatchOperand_Success); 5325 5326 Operands.push_back( 5327 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5328 5329 if (FormatFound) 5330 trySkipToken(AsmToken::Comma); 5331 5332 if (isToken(AsmToken::EndOfStatement)) { 5333 // We are expecting an soffset operand, 5334 // but let matcher handle the error. 5335 return MatchOperand_Success; 5336 } 5337 5338 // Parse soffset. 5339 Res = parseRegOrImm(Operands); 5340 if (Res != MatchOperand_Success) 5341 return Res; 5342 5343 trySkipToken(AsmToken::Comma); 5344 5345 if (!FormatFound) { 5346 Res = parseSymbolicOrNumericFormat(Format); 5347 if (Res == MatchOperand_ParseFail) 5348 return Res; 5349 if (Res == MatchOperand_Success) { 5350 auto Size = Operands.size(); 5351 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5352 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5353 Op.setImm(Format); 5354 } 5355 return MatchOperand_Success; 5356 } 5357 5358 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5359 Error(getLoc(), "duplicate format"); 5360 return MatchOperand_ParseFail; 5361 } 5362 return MatchOperand_Success; 5363 } 5364 5365 //===----------------------------------------------------------------------===// 5366 // ds 5367 //===----------------------------------------------------------------------===// 5368 5369 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5370 const OperandVector &Operands) { 5371 OptionalImmIndexMap OptionalIdx; 5372 5373 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5374 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5375 5376 // Add the register arguments 5377 if (Op.isReg()) { 5378 Op.addRegOperands(Inst, 1); 5379 continue; 5380 } 5381 5382 // Handle optional arguments 5383 OptionalIdx[Op.getImmTy()] = i; 5384 } 5385 5386 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5387 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5388 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5389 5390 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5391 } 5392 5393 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5394 bool IsGdsHardcoded) { 5395 OptionalImmIndexMap OptionalIdx; 5396 5397 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5398 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5399 5400 // Add the register arguments 5401 if (Op.isReg()) { 5402 Op.addRegOperands(Inst, 1); 5403 continue; 5404 } 5405 5406 if (Op.isToken() && Op.getToken() == "gds") { 5407 IsGdsHardcoded = true; 5408 continue; 5409 } 5410 5411 // Handle optional arguments 5412 OptionalIdx[Op.getImmTy()] = i; 5413 } 5414 5415 AMDGPUOperand::ImmTy OffsetType = 5416 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5417 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5418 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5419 AMDGPUOperand::ImmTyOffset; 5420 5421 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5422 5423 if (!IsGdsHardcoded) { 5424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5425 } 5426 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5427 } 5428 5429 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5430 OptionalImmIndexMap OptionalIdx; 5431 5432 unsigned OperandIdx[4]; 5433 unsigned EnMask = 0; 5434 int SrcIdx = 0; 5435 5436 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5437 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5438 5439 // Add the register arguments 5440 if (Op.isReg()) { 5441 assert(SrcIdx < 4); 5442 OperandIdx[SrcIdx] = Inst.size(); 5443 Op.addRegOperands(Inst, 1); 5444 ++SrcIdx; 5445 continue; 5446 } 5447 5448 if (Op.isOff()) { 5449 assert(SrcIdx < 4); 5450 OperandIdx[SrcIdx] = Inst.size(); 5451 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5452 ++SrcIdx; 5453 continue; 5454 } 5455 5456 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5457 Op.addImmOperands(Inst, 1); 5458 continue; 5459 } 5460 5461 if (Op.isToken() && Op.getToken() == "done") 5462 continue; 5463 5464 // Handle optional arguments 5465 OptionalIdx[Op.getImmTy()] = i; 5466 } 5467 5468 assert(SrcIdx == 4); 5469 5470 bool Compr = false; 5471 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5472 Compr = true; 5473 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5474 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5475 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5476 } 5477 5478 for (auto i = 0; i < SrcIdx; ++i) { 5479 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5480 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5481 } 5482 } 5483 5484 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5485 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5486 5487 Inst.addOperand(MCOperand::createImm(EnMask)); 5488 } 5489 5490 //===----------------------------------------------------------------------===// 5491 // s_waitcnt 5492 //===----------------------------------------------------------------------===// 5493 5494 static bool 5495 encodeCnt( 5496 const AMDGPU::IsaVersion ISA, 5497 int64_t &IntVal, 5498 int64_t CntVal, 5499 bool Saturate, 5500 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5501 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5502 { 5503 bool Failed = false; 5504 5505 IntVal = encode(ISA, IntVal, CntVal); 5506 if (CntVal != decode(ISA, IntVal)) { 5507 if (Saturate) { 5508 IntVal = encode(ISA, IntVal, -1); 5509 } else { 5510 Failed = true; 5511 } 5512 } 5513 return Failed; 5514 } 5515 5516 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5517 5518 SMLoc CntLoc = getLoc(); 5519 StringRef CntName = getTokenStr(); 5520 5521 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5522 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5523 return false; 5524 5525 int64_t CntVal; 5526 SMLoc ValLoc = getLoc(); 5527 if (!parseExpr(CntVal)) 5528 return false; 5529 5530 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5531 5532 bool Failed = true; 5533 bool Sat = CntName.endswith("_sat"); 5534 5535 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5536 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5537 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5538 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5539 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5540 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5541 } else { 5542 Error(CntLoc, "invalid counter name " + CntName); 5543 return false; 5544 } 5545 5546 if (Failed) { 5547 Error(ValLoc, "too large value for " + CntName); 5548 return false; 5549 } 5550 5551 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5552 return false; 5553 5554 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5555 if (isToken(AsmToken::EndOfStatement)) { 5556 Error(getLoc(), "expected a counter name"); 5557 return false; 5558 } 5559 } 5560 5561 return true; 5562 } 5563 5564 OperandMatchResultTy 5565 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5566 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5567 int64_t Waitcnt = getWaitcntBitMask(ISA); 5568 SMLoc S = getLoc(); 5569 5570 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5571 while (!isToken(AsmToken::EndOfStatement)) { 5572 if (!parseCnt(Waitcnt)) 5573 return MatchOperand_ParseFail; 5574 } 5575 } else { 5576 if (!parseExpr(Waitcnt)) 5577 return MatchOperand_ParseFail; 5578 } 5579 5580 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5581 return MatchOperand_Success; 5582 } 5583 5584 bool 5585 AMDGPUOperand::isSWaitCnt() const { 5586 return isImm(); 5587 } 5588 5589 //===----------------------------------------------------------------------===// 5590 // hwreg 5591 //===----------------------------------------------------------------------===// 5592 5593 bool 5594 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5595 OperandInfoTy &Offset, 5596 OperandInfoTy &Width) { 5597 using namespace llvm::AMDGPU::Hwreg; 5598 5599 // The register may be specified by name or using a numeric code 5600 HwReg.Loc = getLoc(); 5601 if (isToken(AsmToken::Identifier) && 5602 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5603 HwReg.IsSymbolic = true; 5604 lex(); // skip register name 5605 } else if (!parseExpr(HwReg.Id, "a register name")) { 5606 return false; 5607 } 5608 5609 if (trySkipToken(AsmToken::RParen)) 5610 return true; 5611 5612 // parse optional params 5613 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 5614 return false; 5615 5616 Offset.Loc = getLoc(); 5617 if (!parseExpr(Offset.Id)) 5618 return false; 5619 5620 if (!skipToken(AsmToken::Comma, "expected a comma")) 5621 return false; 5622 5623 Width.Loc = getLoc(); 5624 return parseExpr(Width.Id) && 5625 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5626 } 5627 5628 bool 5629 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5630 const OperandInfoTy &Offset, 5631 const OperandInfoTy &Width) { 5632 5633 using namespace llvm::AMDGPU::Hwreg; 5634 5635 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5636 Error(HwReg.Loc, 5637 "specified hardware register is not supported on this GPU"); 5638 return false; 5639 } 5640 if (!isValidHwreg(HwReg.Id)) { 5641 Error(HwReg.Loc, 5642 "invalid code of hardware register: only 6-bit values are legal"); 5643 return false; 5644 } 5645 if (!isValidHwregOffset(Offset.Id)) { 5646 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 5647 return false; 5648 } 5649 if (!isValidHwregWidth(Width.Id)) { 5650 Error(Width.Loc, 5651 "invalid bitfield width: only values from 1 to 32 are legal"); 5652 return false; 5653 } 5654 return true; 5655 } 5656 5657 OperandMatchResultTy 5658 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5659 using namespace llvm::AMDGPU::Hwreg; 5660 5661 int64_t ImmVal = 0; 5662 SMLoc Loc = getLoc(); 5663 5664 if (trySkipId("hwreg", AsmToken::LParen)) { 5665 OperandInfoTy HwReg(ID_UNKNOWN_); 5666 OperandInfoTy Offset(OFFSET_DEFAULT_); 5667 OperandInfoTy Width(WIDTH_DEFAULT_); 5668 if (parseHwregBody(HwReg, Offset, Width) && 5669 validateHwreg(HwReg, Offset, Width)) { 5670 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 5671 } else { 5672 return MatchOperand_ParseFail; 5673 } 5674 } else if (parseExpr(ImmVal, "a hwreg macro")) { 5675 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5676 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5677 return MatchOperand_ParseFail; 5678 } 5679 } else { 5680 return MatchOperand_ParseFail; 5681 } 5682 5683 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5684 return MatchOperand_Success; 5685 } 5686 5687 bool AMDGPUOperand::isHwreg() const { 5688 return isImmTy(ImmTyHwreg); 5689 } 5690 5691 //===----------------------------------------------------------------------===// 5692 // sendmsg 5693 //===----------------------------------------------------------------------===// 5694 5695 bool 5696 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5697 OperandInfoTy &Op, 5698 OperandInfoTy &Stream) { 5699 using namespace llvm::AMDGPU::SendMsg; 5700 5701 Msg.Loc = getLoc(); 5702 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5703 Msg.IsSymbolic = true; 5704 lex(); // skip message name 5705 } else if (!parseExpr(Msg.Id, "a message name")) { 5706 return false; 5707 } 5708 5709 if (trySkipToken(AsmToken::Comma)) { 5710 Op.IsDefined = true; 5711 Op.Loc = getLoc(); 5712 if (isToken(AsmToken::Identifier) && 5713 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5714 lex(); // skip operation name 5715 } else if (!parseExpr(Op.Id, "an operation name")) { 5716 return false; 5717 } 5718 5719 if (trySkipToken(AsmToken::Comma)) { 5720 Stream.IsDefined = true; 5721 Stream.Loc = getLoc(); 5722 if (!parseExpr(Stream.Id)) 5723 return false; 5724 } 5725 } 5726 5727 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5728 } 5729 5730 bool 5731 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5732 const OperandInfoTy &Op, 5733 const OperandInfoTy &Stream) { 5734 using namespace llvm::AMDGPU::SendMsg; 5735 5736 // Validation strictness depends on whether message is specified 5737 // in a symbolc or in a numeric form. In the latter case 5738 // only encoding possibility is checked. 5739 bool Strict = Msg.IsSymbolic; 5740 5741 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5742 Error(Msg.Loc, "invalid message id"); 5743 return false; 5744 } 5745 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5746 if (Op.IsDefined) { 5747 Error(Op.Loc, "message does not support operations"); 5748 } else { 5749 Error(Msg.Loc, "missing message operation"); 5750 } 5751 return false; 5752 } 5753 if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5754 Error(Op.Loc, "invalid operation id"); 5755 return false; 5756 } 5757 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5758 Error(Stream.Loc, "message operation does not support streams"); 5759 return false; 5760 } 5761 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5762 Error(Stream.Loc, "invalid message stream id"); 5763 return false; 5764 } 5765 return true; 5766 } 5767 5768 OperandMatchResultTy 5769 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5770 using namespace llvm::AMDGPU::SendMsg; 5771 5772 int64_t ImmVal = 0; 5773 SMLoc Loc = getLoc(); 5774 5775 if (trySkipId("sendmsg", AsmToken::LParen)) { 5776 OperandInfoTy Msg(ID_UNKNOWN_); 5777 OperandInfoTy Op(OP_NONE_); 5778 OperandInfoTy Stream(STREAM_ID_NONE_); 5779 if (parseSendMsgBody(Msg, Op, Stream) && 5780 validateSendMsg(Msg, Op, Stream)) { 5781 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5782 } else { 5783 return MatchOperand_ParseFail; 5784 } 5785 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 5786 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5787 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5788 return MatchOperand_ParseFail; 5789 } 5790 } else { 5791 return MatchOperand_ParseFail; 5792 } 5793 5794 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5795 return MatchOperand_Success; 5796 } 5797 5798 bool AMDGPUOperand::isSendMsg() const { 5799 return isImmTy(ImmTySendMsg); 5800 } 5801 5802 //===----------------------------------------------------------------------===// 5803 // v_interp 5804 //===----------------------------------------------------------------------===// 5805 5806 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5807 StringRef Str; 5808 SMLoc S = getLoc(); 5809 5810 if (!parseId(Str)) 5811 return MatchOperand_NoMatch; 5812 5813 int Slot = StringSwitch<int>(Str) 5814 .Case("p10", 0) 5815 .Case("p20", 1) 5816 .Case("p0", 2) 5817 .Default(-1); 5818 5819 if (Slot == -1) { 5820 Error(S, "invalid interpolation slot"); 5821 return MatchOperand_ParseFail; 5822 } 5823 5824 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5825 AMDGPUOperand::ImmTyInterpSlot)); 5826 return MatchOperand_Success; 5827 } 5828 5829 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5830 StringRef Str; 5831 SMLoc S = getLoc(); 5832 5833 if (!parseId(Str)) 5834 return MatchOperand_NoMatch; 5835 5836 if (!Str.startswith("attr")) { 5837 Error(S, "invalid interpolation attribute"); 5838 return MatchOperand_ParseFail; 5839 } 5840 5841 StringRef Chan = Str.take_back(2); 5842 int AttrChan = StringSwitch<int>(Chan) 5843 .Case(".x", 0) 5844 .Case(".y", 1) 5845 .Case(".z", 2) 5846 .Case(".w", 3) 5847 .Default(-1); 5848 if (AttrChan == -1) { 5849 Error(S, "invalid or missing interpolation attribute channel"); 5850 return MatchOperand_ParseFail; 5851 } 5852 5853 Str = Str.drop_back(2).drop_front(4); 5854 5855 uint8_t Attr; 5856 if (Str.getAsInteger(10, Attr)) { 5857 Error(S, "invalid or missing interpolation attribute number"); 5858 return MatchOperand_ParseFail; 5859 } 5860 5861 if (Attr > 63) { 5862 Error(S, "out of bounds interpolation attribute number"); 5863 return MatchOperand_ParseFail; 5864 } 5865 5866 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5867 5868 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5869 AMDGPUOperand::ImmTyInterpAttr)); 5870 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5871 AMDGPUOperand::ImmTyAttrChan)); 5872 return MatchOperand_Success; 5873 } 5874 5875 //===----------------------------------------------------------------------===// 5876 // exp 5877 //===----------------------------------------------------------------------===// 5878 5879 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5880 using namespace llvm::AMDGPU::Exp; 5881 5882 StringRef Str; 5883 SMLoc S = getLoc(); 5884 5885 if (!parseId(Str)) 5886 return MatchOperand_NoMatch; 5887 5888 unsigned Id = getTgtId(Str); 5889 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 5890 Error(S, (Id == ET_INVALID) ? 5891 "invalid exp target" : 5892 "exp target is not supported on this GPU"); 5893 return MatchOperand_ParseFail; 5894 } 5895 5896 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 5897 AMDGPUOperand::ImmTyExpTgt)); 5898 return MatchOperand_Success; 5899 } 5900 5901 //===----------------------------------------------------------------------===// 5902 // parser helpers 5903 //===----------------------------------------------------------------------===// 5904 5905 bool 5906 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5907 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5908 } 5909 5910 bool 5911 AMDGPUAsmParser::isId(const StringRef Id) const { 5912 return isId(getToken(), Id); 5913 } 5914 5915 bool 5916 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5917 return getTokenKind() == Kind; 5918 } 5919 5920 bool 5921 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5922 if (isId(Id)) { 5923 lex(); 5924 return true; 5925 } 5926 return false; 5927 } 5928 5929 bool 5930 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 5931 if (isToken(AsmToken::Identifier)) { 5932 StringRef Tok = getTokenStr(); 5933 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 5934 lex(); 5935 return true; 5936 } 5937 } 5938 return false; 5939 } 5940 5941 bool 5942 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5943 if (isId(Id) && peekToken().is(Kind)) { 5944 lex(); 5945 lex(); 5946 return true; 5947 } 5948 return false; 5949 } 5950 5951 bool 5952 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5953 if (isToken(Kind)) { 5954 lex(); 5955 return true; 5956 } 5957 return false; 5958 } 5959 5960 bool 5961 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5962 const StringRef ErrMsg) { 5963 if (!trySkipToken(Kind)) { 5964 Error(getLoc(), ErrMsg); 5965 return false; 5966 } 5967 return true; 5968 } 5969 5970 bool 5971 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 5972 SMLoc S = getLoc(); 5973 5974 const MCExpr *Expr; 5975 if (Parser.parseExpression(Expr)) 5976 return false; 5977 5978 if (Expr->evaluateAsAbsolute(Imm)) 5979 return true; 5980 5981 if (Expected.empty()) { 5982 Error(S, "expected absolute expression"); 5983 } else { 5984 Error(S, Twine("expected ", Expected) + 5985 Twine(" or an absolute expression")); 5986 } 5987 return false; 5988 } 5989 5990 bool 5991 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5992 SMLoc S = getLoc(); 5993 5994 const MCExpr *Expr; 5995 if (Parser.parseExpression(Expr)) 5996 return false; 5997 5998 int64_t IntVal; 5999 if (Expr->evaluateAsAbsolute(IntVal)) { 6000 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6001 } else { 6002 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6003 } 6004 return true; 6005 } 6006 6007 bool 6008 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6009 if (isToken(AsmToken::String)) { 6010 Val = getToken().getStringContents(); 6011 lex(); 6012 return true; 6013 } else { 6014 Error(getLoc(), ErrMsg); 6015 return false; 6016 } 6017 } 6018 6019 bool 6020 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6021 if (isToken(AsmToken::Identifier)) { 6022 Val = getTokenStr(); 6023 lex(); 6024 return true; 6025 } else { 6026 if (!ErrMsg.empty()) 6027 Error(getLoc(), ErrMsg); 6028 return false; 6029 } 6030 } 6031 6032 AsmToken 6033 AMDGPUAsmParser::getToken() const { 6034 return Parser.getTok(); 6035 } 6036 6037 AsmToken 6038 AMDGPUAsmParser::peekToken() { 6039 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6040 } 6041 6042 void 6043 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6044 auto TokCount = getLexer().peekTokens(Tokens); 6045 6046 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6047 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6048 } 6049 6050 AsmToken::TokenKind 6051 AMDGPUAsmParser::getTokenKind() const { 6052 return getLexer().getKind(); 6053 } 6054 6055 SMLoc 6056 AMDGPUAsmParser::getLoc() const { 6057 return getToken().getLoc(); 6058 } 6059 6060 StringRef 6061 AMDGPUAsmParser::getTokenStr() const { 6062 return getToken().getString(); 6063 } 6064 6065 void 6066 AMDGPUAsmParser::lex() { 6067 Parser.Lex(); 6068 } 6069 6070 SMLoc 6071 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6072 const OperandVector &Operands) const { 6073 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6074 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6075 if (Test(Op)) 6076 return Op.getStartLoc(); 6077 } 6078 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6079 } 6080 6081 SMLoc 6082 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6083 const OperandVector &Operands) const { 6084 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6085 return getOperandLoc(Test, Operands); 6086 } 6087 6088 SMLoc 6089 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6090 const OperandVector &Operands) const { 6091 auto Test = [=](const AMDGPUOperand& Op) { 6092 return Op.isRegKind() && Op.getReg() == Reg; 6093 }; 6094 return getOperandLoc(Test, Operands); 6095 } 6096 6097 SMLoc 6098 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6099 auto Test = [](const AMDGPUOperand& Op) { 6100 return Op.IsImmKindLiteral() || Op.isExpr(); 6101 }; 6102 return getOperandLoc(Test, Operands); 6103 } 6104 6105 SMLoc 6106 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6107 auto Test = [](const AMDGPUOperand& Op) { 6108 return Op.isImmKindConst(); 6109 }; 6110 return getOperandLoc(Test, Operands); 6111 } 6112 6113 //===----------------------------------------------------------------------===// 6114 // swizzle 6115 //===----------------------------------------------------------------------===// 6116 6117 LLVM_READNONE 6118 static unsigned 6119 encodeBitmaskPerm(const unsigned AndMask, 6120 const unsigned OrMask, 6121 const unsigned XorMask) { 6122 using namespace llvm::AMDGPU::Swizzle; 6123 6124 return BITMASK_PERM_ENC | 6125 (AndMask << BITMASK_AND_SHIFT) | 6126 (OrMask << BITMASK_OR_SHIFT) | 6127 (XorMask << BITMASK_XOR_SHIFT); 6128 } 6129 6130 bool 6131 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6132 const unsigned MinVal, 6133 const unsigned MaxVal, 6134 const StringRef ErrMsg, 6135 SMLoc &Loc) { 6136 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6137 return false; 6138 } 6139 Loc = getLoc(); 6140 if (!parseExpr(Op)) { 6141 return false; 6142 } 6143 if (Op < MinVal || Op > MaxVal) { 6144 Error(Loc, ErrMsg); 6145 return false; 6146 } 6147 6148 return true; 6149 } 6150 6151 bool 6152 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6153 const unsigned MinVal, 6154 const unsigned MaxVal, 6155 const StringRef ErrMsg) { 6156 SMLoc Loc; 6157 for (unsigned i = 0; i < OpNum; ++i) { 6158 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6159 return false; 6160 } 6161 6162 return true; 6163 } 6164 6165 bool 6166 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6167 using namespace llvm::AMDGPU::Swizzle; 6168 6169 int64_t Lane[LANE_NUM]; 6170 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6171 "expected a 2-bit lane id")) { 6172 Imm = QUAD_PERM_ENC; 6173 for (unsigned I = 0; I < LANE_NUM; ++I) { 6174 Imm |= Lane[I] << (LANE_SHIFT * I); 6175 } 6176 return true; 6177 } 6178 return false; 6179 } 6180 6181 bool 6182 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6183 using namespace llvm::AMDGPU::Swizzle; 6184 6185 SMLoc Loc; 6186 int64_t GroupSize; 6187 int64_t LaneIdx; 6188 6189 if (!parseSwizzleOperand(GroupSize, 6190 2, 32, 6191 "group size must be in the interval [2,32]", 6192 Loc)) { 6193 return false; 6194 } 6195 if (!isPowerOf2_64(GroupSize)) { 6196 Error(Loc, "group size must be a power of two"); 6197 return false; 6198 } 6199 if (parseSwizzleOperand(LaneIdx, 6200 0, GroupSize - 1, 6201 "lane id must be in the interval [0,group size - 1]", 6202 Loc)) { 6203 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6204 return true; 6205 } 6206 return false; 6207 } 6208 6209 bool 6210 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6211 using namespace llvm::AMDGPU::Swizzle; 6212 6213 SMLoc Loc; 6214 int64_t GroupSize; 6215 6216 if (!parseSwizzleOperand(GroupSize, 6217 2, 32, 6218 "group size must be in the interval [2,32]", 6219 Loc)) { 6220 return false; 6221 } 6222 if (!isPowerOf2_64(GroupSize)) { 6223 Error(Loc, "group size must be a power of two"); 6224 return false; 6225 } 6226 6227 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6228 return true; 6229 } 6230 6231 bool 6232 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6233 using namespace llvm::AMDGPU::Swizzle; 6234 6235 SMLoc Loc; 6236 int64_t GroupSize; 6237 6238 if (!parseSwizzleOperand(GroupSize, 6239 1, 16, 6240 "group size must be in the interval [1,16]", 6241 Loc)) { 6242 return false; 6243 } 6244 if (!isPowerOf2_64(GroupSize)) { 6245 Error(Loc, "group size must be a power of two"); 6246 return false; 6247 } 6248 6249 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6250 return true; 6251 } 6252 6253 bool 6254 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6255 using namespace llvm::AMDGPU::Swizzle; 6256 6257 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6258 return false; 6259 } 6260 6261 StringRef Ctl; 6262 SMLoc StrLoc = getLoc(); 6263 if (!parseString(Ctl)) { 6264 return false; 6265 } 6266 if (Ctl.size() != BITMASK_WIDTH) { 6267 Error(StrLoc, "expected a 5-character mask"); 6268 return false; 6269 } 6270 6271 unsigned AndMask = 0; 6272 unsigned OrMask = 0; 6273 unsigned XorMask = 0; 6274 6275 for (size_t i = 0; i < Ctl.size(); ++i) { 6276 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6277 switch(Ctl[i]) { 6278 default: 6279 Error(StrLoc, "invalid mask"); 6280 return false; 6281 case '0': 6282 break; 6283 case '1': 6284 OrMask |= Mask; 6285 break; 6286 case 'p': 6287 AndMask |= Mask; 6288 break; 6289 case 'i': 6290 AndMask |= Mask; 6291 XorMask |= Mask; 6292 break; 6293 } 6294 } 6295 6296 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6297 return true; 6298 } 6299 6300 bool 6301 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6302 6303 SMLoc OffsetLoc = getLoc(); 6304 6305 if (!parseExpr(Imm, "a swizzle macro")) { 6306 return false; 6307 } 6308 if (!isUInt<16>(Imm)) { 6309 Error(OffsetLoc, "expected a 16-bit offset"); 6310 return false; 6311 } 6312 return true; 6313 } 6314 6315 bool 6316 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6317 using namespace llvm::AMDGPU::Swizzle; 6318 6319 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6320 6321 SMLoc ModeLoc = getLoc(); 6322 bool Ok = false; 6323 6324 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6325 Ok = parseSwizzleQuadPerm(Imm); 6326 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6327 Ok = parseSwizzleBitmaskPerm(Imm); 6328 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6329 Ok = parseSwizzleBroadcast(Imm); 6330 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6331 Ok = parseSwizzleSwap(Imm); 6332 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6333 Ok = parseSwizzleReverse(Imm); 6334 } else { 6335 Error(ModeLoc, "expected a swizzle mode"); 6336 } 6337 6338 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6339 } 6340 6341 return false; 6342 } 6343 6344 OperandMatchResultTy 6345 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6346 SMLoc S = getLoc(); 6347 int64_t Imm = 0; 6348 6349 if (trySkipId("offset")) { 6350 6351 bool Ok = false; 6352 if (skipToken(AsmToken::Colon, "expected a colon")) { 6353 if (trySkipId("swizzle")) { 6354 Ok = parseSwizzleMacro(Imm); 6355 } else { 6356 Ok = parseSwizzleOffset(Imm); 6357 } 6358 } 6359 6360 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6361 6362 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6363 } else { 6364 // Swizzle "offset" operand is optional. 6365 // If it is omitted, try parsing other optional operands. 6366 return parseOptionalOpr(Operands); 6367 } 6368 } 6369 6370 bool 6371 AMDGPUOperand::isSwizzle() const { 6372 return isImmTy(ImmTySwizzle); 6373 } 6374 6375 //===----------------------------------------------------------------------===// 6376 // VGPR Index Mode 6377 //===----------------------------------------------------------------------===// 6378 6379 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6380 6381 using namespace llvm::AMDGPU::VGPRIndexMode; 6382 6383 if (trySkipToken(AsmToken::RParen)) { 6384 return OFF; 6385 } 6386 6387 int64_t Imm = 0; 6388 6389 while (true) { 6390 unsigned Mode = 0; 6391 SMLoc S = getLoc(); 6392 6393 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6394 if (trySkipId(IdSymbolic[ModeId])) { 6395 Mode = 1 << ModeId; 6396 break; 6397 } 6398 } 6399 6400 if (Mode == 0) { 6401 Error(S, (Imm == 0)? 6402 "expected a VGPR index mode or a closing parenthesis" : 6403 "expected a VGPR index mode"); 6404 return UNDEF; 6405 } 6406 6407 if (Imm & Mode) { 6408 Error(S, "duplicate VGPR index mode"); 6409 return UNDEF; 6410 } 6411 Imm |= Mode; 6412 6413 if (trySkipToken(AsmToken::RParen)) 6414 break; 6415 if (!skipToken(AsmToken::Comma, 6416 "expected a comma or a closing parenthesis")) 6417 return UNDEF; 6418 } 6419 6420 return Imm; 6421 } 6422 6423 OperandMatchResultTy 6424 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6425 6426 using namespace llvm::AMDGPU::VGPRIndexMode; 6427 6428 int64_t Imm = 0; 6429 SMLoc S = getLoc(); 6430 6431 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6432 Imm = parseGPRIdxMacro(); 6433 if (Imm == UNDEF) 6434 return MatchOperand_ParseFail; 6435 } else { 6436 if (getParser().parseAbsoluteExpression(Imm)) 6437 return MatchOperand_ParseFail; 6438 if (Imm < 0 || !isUInt<4>(Imm)) { 6439 Error(S, "invalid immediate: only 4-bit values are legal"); 6440 return MatchOperand_ParseFail; 6441 } 6442 } 6443 6444 Operands.push_back( 6445 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6446 return MatchOperand_Success; 6447 } 6448 6449 bool AMDGPUOperand::isGPRIdxMode() const { 6450 return isImmTy(ImmTyGprIdxMode); 6451 } 6452 6453 //===----------------------------------------------------------------------===// 6454 // sopp branch targets 6455 //===----------------------------------------------------------------------===// 6456 6457 OperandMatchResultTy 6458 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6459 6460 // Make sure we are not parsing something 6461 // that looks like a label or an expression but is not. 6462 // This will improve error messages. 6463 if (isRegister() || isModifier()) 6464 return MatchOperand_NoMatch; 6465 6466 if (!parseExpr(Operands)) 6467 return MatchOperand_ParseFail; 6468 6469 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6470 assert(Opr.isImm() || Opr.isExpr()); 6471 SMLoc Loc = Opr.getStartLoc(); 6472 6473 // Currently we do not support arbitrary expressions as branch targets. 6474 // Only labels and absolute expressions are accepted. 6475 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6476 Error(Loc, "expected an absolute expression or a label"); 6477 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6478 Error(Loc, "expected a 16-bit signed jump offset"); 6479 } 6480 6481 return MatchOperand_Success; 6482 } 6483 6484 //===----------------------------------------------------------------------===// 6485 // Boolean holding registers 6486 //===----------------------------------------------------------------------===// 6487 6488 OperandMatchResultTy 6489 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6490 return parseReg(Operands); 6491 } 6492 6493 //===----------------------------------------------------------------------===// 6494 // mubuf 6495 //===----------------------------------------------------------------------===// 6496 6497 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6498 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6499 } 6500 6501 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6502 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6503 } 6504 6505 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6506 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6507 } 6508 6509 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6510 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6511 } 6512 6513 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6514 const OperandVector &Operands, 6515 bool IsAtomic, 6516 bool IsAtomicReturn, 6517 bool IsLds) { 6518 bool IsLdsOpcode = IsLds; 6519 bool HasLdsModifier = false; 6520 OptionalImmIndexMap OptionalIdx; 6521 assert(IsAtomicReturn ? IsAtomic : true); 6522 unsigned FirstOperandIdx = 1; 6523 6524 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6525 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6526 6527 // Add the register arguments 6528 if (Op.isReg()) { 6529 Op.addRegOperands(Inst, 1); 6530 // Insert a tied src for atomic return dst. 6531 // This cannot be postponed as subsequent calls to 6532 // addImmOperands rely on correct number of MC operands. 6533 if (IsAtomicReturn && i == FirstOperandIdx) 6534 Op.addRegOperands(Inst, 1); 6535 continue; 6536 } 6537 6538 // Handle the case where soffset is an immediate 6539 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6540 Op.addImmOperands(Inst, 1); 6541 continue; 6542 } 6543 6544 HasLdsModifier |= Op.isLDS(); 6545 6546 // Handle tokens like 'offen' which are sometimes hard-coded into the 6547 // asm string. There are no MCInst operands for these. 6548 if (Op.isToken()) { 6549 continue; 6550 } 6551 assert(Op.isImm()); 6552 6553 // Handle optional arguments 6554 OptionalIdx[Op.getImmTy()] = i; 6555 } 6556 6557 // This is a workaround for an llvm quirk which may result in an 6558 // incorrect instruction selection. Lds and non-lds versions of 6559 // MUBUF instructions are identical except that lds versions 6560 // have mandatory 'lds' modifier. However this modifier follows 6561 // optional modifiers and llvm asm matcher regards this 'lds' 6562 // modifier as an optional one. As a result, an lds version 6563 // of opcode may be selected even if it has no 'lds' modifier. 6564 if (IsLdsOpcode && !HasLdsModifier) { 6565 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6566 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6567 Inst.setOpcode(NoLdsOpcode); 6568 IsLdsOpcode = false; 6569 } 6570 } 6571 6572 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6573 if (!IsAtomic || IsAtomicReturn) { 6574 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, 6575 IsAtomicReturn ? -1 : 0); 6576 } 6577 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6578 6579 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6580 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6581 } 6582 6583 if (isGFX10Plus()) 6584 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6585 } 6586 6587 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6588 OptionalImmIndexMap OptionalIdx; 6589 6590 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6591 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6592 6593 // Add the register arguments 6594 if (Op.isReg()) { 6595 Op.addRegOperands(Inst, 1); 6596 continue; 6597 } 6598 6599 // Handle the case where soffset is an immediate 6600 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6601 Op.addImmOperands(Inst, 1); 6602 continue; 6603 } 6604 6605 // Handle tokens like 'offen' which are sometimes hard-coded into the 6606 // asm string. There are no MCInst operands for these. 6607 if (Op.isToken()) { 6608 continue; 6609 } 6610 assert(Op.isImm()); 6611 6612 // Handle optional arguments 6613 OptionalIdx[Op.getImmTy()] = i; 6614 } 6615 6616 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6617 AMDGPUOperand::ImmTyOffset); 6618 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6619 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6620 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6621 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6622 6623 if (isGFX10Plus()) 6624 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6625 } 6626 6627 //===----------------------------------------------------------------------===// 6628 // mimg 6629 //===----------------------------------------------------------------------===// 6630 6631 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6632 bool IsAtomic) { 6633 unsigned I = 1; 6634 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6635 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6636 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6637 } 6638 6639 if (IsAtomic) { 6640 // Add src, same as dst 6641 assert(Desc.getNumDefs() == 1); 6642 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6643 } 6644 6645 OptionalImmIndexMap OptionalIdx; 6646 6647 for (unsigned E = Operands.size(); I != E; ++I) { 6648 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6649 6650 // Add the register arguments 6651 if (Op.isReg()) { 6652 Op.addRegOperands(Inst, 1); 6653 } else if (Op.isImmModifier()) { 6654 OptionalIdx[Op.getImmTy()] = I; 6655 } else if (!Op.isToken()) { 6656 llvm_unreachable("unexpected operand type"); 6657 } 6658 } 6659 6660 bool IsGFX10Plus = isGFX10Plus(); 6661 6662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6663 if (IsGFX10Plus) 6664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6665 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6666 if (IsGFX10Plus) 6667 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6670 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6671 if (IsGFX10Plus) 6672 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6673 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6674 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6675 if (!IsGFX10Plus) 6676 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6677 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6678 } 6679 6680 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6681 cvtMIMG(Inst, Operands, true); 6682 } 6683 6684 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6685 const OperandVector &Operands) { 6686 for (unsigned I = 1; I < Operands.size(); ++I) { 6687 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6688 if (Operand.isReg()) 6689 Operand.addRegOperands(Inst, 1); 6690 } 6691 6692 Inst.addOperand(MCOperand::createImm(1)); // a16 6693 } 6694 6695 //===----------------------------------------------------------------------===// 6696 // smrd 6697 //===----------------------------------------------------------------------===// 6698 6699 bool AMDGPUOperand::isSMRDOffset8() const { 6700 return isImm() && isUInt<8>(getImm()); 6701 } 6702 6703 bool AMDGPUOperand::isSMEMOffset() const { 6704 return isImm(); // Offset range is checked later by validator. 6705 } 6706 6707 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6708 // 32-bit literals are only supported on CI and we only want to use them 6709 // when the offset is > 8-bits. 6710 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6711 } 6712 6713 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6714 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6715 } 6716 6717 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6718 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6719 } 6720 6721 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6722 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6723 } 6724 6725 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6726 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6727 } 6728 6729 //===----------------------------------------------------------------------===// 6730 // vop3 6731 //===----------------------------------------------------------------------===// 6732 6733 static bool ConvertOmodMul(int64_t &Mul) { 6734 if (Mul != 1 && Mul != 2 && Mul != 4) 6735 return false; 6736 6737 Mul >>= 1; 6738 return true; 6739 } 6740 6741 static bool ConvertOmodDiv(int64_t &Div) { 6742 if (Div == 1) { 6743 Div = 0; 6744 return true; 6745 } 6746 6747 if (Div == 2) { 6748 Div = 3; 6749 return true; 6750 } 6751 6752 return false; 6753 } 6754 6755 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6756 if (BoundCtrl == 0) { 6757 BoundCtrl = 1; 6758 return true; 6759 } 6760 6761 if (BoundCtrl == -1) { 6762 BoundCtrl = 0; 6763 return true; 6764 } 6765 6766 return false; 6767 } 6768 6769 // Note: the order in this table matches the order of operands in AsmString. 6770 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6771 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6772 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6773 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6774 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6775 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6776 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6777 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6778 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6779 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6780 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6781 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6782 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6783 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6784 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6785 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6786 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6787 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6788 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6789 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6790 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6791 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6792 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6793 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6794 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6795 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6796 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6797 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6798 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6799 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6800 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6801 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6802 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6803 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6804 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6805 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6806 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6807 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6808 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6809 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6810 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6811 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6812 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6813 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6814 }; 6815 6816 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6817 6818 OperandMatchResultTy res = parseOptionalOpr(Operands); 6819 6820 // This is a hack to enable hardcoded mandatory operands which follow 6821 // optional operands. 6822 // 6823 // Current design assumes that all operands after the first optional operand 6824 // are also optional. However implementation of some instructions violates 6825 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6826 // 6827 // To alleviate this problem, we have to (implicitly) parse extra operands 6828 // to make sure autogenerated parser of custom operands never hit hardcoded 6829 // mandatory operands. 6830 6831 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6832 if (res != MatchOperand_Success || 6833 isToken(AsmToken::EndOfStatement)) 6834 break; 6835 6836 trySkipToken(AsmToken::Comma); 6837 res = parseOptionalOpr(Operands); 6838 } 6839 6840 return res; 6841 } 6842 6843 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6844 OperandMatchResultTy res; 6845 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6846 // try to parse any optional operand here 6847 if (Op.IsBit) { 6848 res = parseNamedBit(Op.Name, Operands, Op.Type); 6849 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6850 res = parseOModOperand(Operands); 6851 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6852 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6853 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6854 res = parseSDWASel(Operands, Op.Name, Op.Type); 6855 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6856 res = parseSDWADstUnused(Operands); 6857 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6858 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6859 Op.Type == AMDGPUOperand::ImmTyNegLo || 6860 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6861 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6862 Op.ConvertResult); 6863 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6864 res = parseDim(Operands); 6865 } else { 6866 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6867 } 6868 if (res != MatchOperand_NoMatch) { 6869 return res; 6870 } 6871 } 6872 return MatchOperand_NoMatch; 6873 } 6874 6875 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6876 StringRef Name = getTokenStr(); 6877 if (Name == "mul") { 6878 return parseIntWithPrefix("mul", Operands, 6879 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6880 } 6881 6882 if (Name == "div") { 6883 return parseIntWithPrefix("div", Operands, 6884 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6885 } 6886 6887 return MatchOperand_NoMatch; 6888 } 6889 6890 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6891 cvtVOP3P(Inst, Operands); 6892 6893 int Opc = Inst.getOpcode(); 6894 6895 int SrcNum; 6896 const int Ops[] = { AMDGPU::OpName::src0, 6897 AMDGPU::OpName::src1, 6898 AMDGPU::OpName::src2 }; 6899 for (SrcNum = 0; 6900 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6901 ++SrcNum); 6902 assert(SrcNum > 0); 6903 6904 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6905 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6906 6907 if ((OpSel & (1 << SrcNum)) != 0) { 6908 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6909 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6910 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6911 } 6912 } 6913 6914 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6915 // 1. This operand is input modifiers 6916 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6917 // 2. This is not last operand 6918 && Desc.NumOperands > (OpNum + 1) 6919 // 3. Next operand is register class 6920 && Desc.OpInfo[OpNum + 1].RegClass != -1 6921 // 4. Next register is not tied to any other operand 6922 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6923 } 6924 6925 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6926 { 6927 OptionalImmIndexMap OptionalIdx; 6928 unsigned Opc = Inst.getOpcode(); 6929 6930 unsigned I = 1; 6931 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6932 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6933 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6934 } 6935 6936 for (unsigned E = Operands.size(); I != E; ++I) { 6937 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6938 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6939 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6940 } else if (Op.isInterpSlot() || 6941 Op.isInterpAttr() || 6942 Op.isAttrChan()) { 6943 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6944 } else if (Op.isImmModifier()) { 6945 OptionalIdx[Op.getImmTy()] = I; 6946 } else { 6947 llvm_unreachable("unhandled operand type"); 6948 } 6949 } 6950 6951 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6953 } 6954 6955 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6957 } 6958 6959 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6960 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6961 } 6962 } 6963 6964 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6965 OptionalImmIndexMap &OptionalIdx) { 6966 unsigned Opc = Inst.getOpcode(); 6967 6968 unsigned I = 1; 6969 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6970 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6971 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6972 } 6973 6974 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6975 // This instruction has src modifiers 6976 for (unsigned E = Operands.size(); I != E; ++I) { 6977 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6978 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6979 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6980 } else if (Op.isImmModifier()) { 6981 OptionalIdx[Op.getImmTy()] = I; 6982 } else if (Op.isRegOrImm()) { 6983 Op.addRegOrImmOperands(Inst, 1); 6984 } else { 6985 llvm_unreachable("unhandled operand type"); 6986 } 6987 } 6988 } else { 6989 // No src modifiers 6990 for (unsigned E = Operands.size(); I != E; ++I) { 6991 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6992 if (Op.isMod()) { 6993 OptionalIdx[Op.getImmTy()] = I; 6994 } else { 6995 Op.addRegOrImmOperands(Inst, 1); 6996 } 6997 } 6998 } 6999 7000 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7002 } 7003 7004 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7005 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7006 } 7007 7008 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7009 // it has src2 register operand that is tied to dst operand 7010 // we don't allow modifiers for this operand in assembler so src2_modifiers 7011 // should be 0. 7012 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7013 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7014 Opc == AMDGPU::V_MAC_F32_e64_vi || 7015 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7016 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7017 Opc == AMDGPU::V_MAC_F16_e64_vi || 7018 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7019 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7020 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7021 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7022 auto it = Inst.begin(); 7023 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7024 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7025 ++it; 7026 // Copy the operand to ensure it's not invalidated when Inst grows. 7027 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7028 } 7029 } 7030 7031 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7032 OptionalImmIndexMap OptionalIdx; 7033 cvtVOP3(Inst, Operands, OptionalIdx); 7034 } 7035 7036 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7037 const OperandVector &Operands) { 7038 OptionalImmIndexMap OptIdx; 7039 const int Opc = Inst.getOpcode(); 7040 const MCInstrDesc &Desc = MII.get(Opc); 7041 7042 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7043 7044 cvtVOP3(Inst, Operands, OptIdx); 7045 7046 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7047 assert(!IsPacked); 7048 Inst.addOperand(Inst.getOperand(0)); 7049 } 7050 7051 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7052 // instruction, and then figure out where to actually put the modifiers 7053 7054 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7055 7056 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7057 if (OpSelHiIdx != -1) { 7058 int DefaultVal = IsPacked ? -1 : 0; 7059 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7060 DefaultVal); 7061 } 7062 7063 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7064 if (NegLoIdx != -1) { 7065 assert(IsPacked); 7066 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7067 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7068 } 7069 7070 const int Ops[] = { AMDGPU::OpName::src0, 7071 AMDGPU::OpName::src1, 7072 AMDGPU::OpName::src2 }; 7073 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7074 AMDGPU::OpName::src1_modifiers, 7075 AMDGPU::OpName::src2_modifiers }; 7076 7077 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7078 7079 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7080 unsigned OpSelHi = 0; 7081 unsigned NegLo = 0; 7082 unsigned NegHi = 0; 7083 7084 if (OpSelHiIdx != -1) { 7085 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7086 } 7087 7088 if (NegLoIdx != -1) { 7089 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7090 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7091 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7092 } 7093 7094 for (int J = 0; J < 3; ++J) { 7095 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7096 if (OpIdx == -1) 7097 break; 7098 7099 uint32_t ModVal = 0; 7100 7101 if ((OpSel & (1 << J)) != 0) 7102 ModVal |= SISrcMods::OP_SEL_0; 7103 7104 if ((OpSelHi & (1 << J)) != 0) 7105 ModVal |= SISrcMods::OP_SEL_1; 7106 7107 if ((NegLo & (1 << J)) != 0) 7108 ModVal |= SISrcMods::NEG; 7109 7110 if ((NegHi & (1 << J)) != 0) 7111 ModVal |= SISrcMods::NEG_HI; 7112 7113 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7114 7115 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7116 } 7117 } 7118 7119 //===----------------------------------------------------------------------===// 7120 // dpp 7121 //===----------------------------------------------------------------------===// 7122 7123 bool AMDGPUOperand::isDPP8() const { 7124 return isImmTy(ImmTyDPP8); 7125 } 7126 7127 bool AMDGPUOperand::isDPPCtrl() const { 7128 using namespace AMDGPU::DPP; 7129 7130 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7131 if (result) { 7132 int64_t Imm = getImm(); 7133 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7134 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7135 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7136 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7137 (Imm == DppCtrl::WAVE_SHL1) || 7138 (Imm == DppCtrl::WAVE_ROL1) || 7139 (Imm == DppCtrl::WAVE_SHR1) || 7140 (Imm == DppCtrl::WAVE_ROR1) || 7141 (Imm == DppCtrl::ROW_MIRROR) || 7142 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7143 (Imm == DppCtrl::BCAST15) || 7144 (Imm == DppCtrl::BCAST31) || 7145 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7146 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7147 } 7148 return false; 7149 } 7150 7151 //===----------------------------------------------------------------------===// 7152 // mAI 7153 //===----------------------------------------------------------------------===// 7154 7155 bool AMDGPUOperand::isBLGP() const { 7156 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7157 } 7158 7159 bool AMDGPUOperand::isCBSZ() const { 7160 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7161 } 7162 7163 bool AMDGPUOperand::isABID() const { 7164 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7165 } 7166 7167 bool AMDGPUOperand::isS16Imm() const { 7168 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7169 } 7170 7171 bool AMDGPUOperand::isU16Imm() const { 7172 return isImm() && isUInt<16>(getImm()); 7173 } 7174 7175 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7176 if (!isGFX10Plus()) 7177 return MatchOperand_NoMatch; 7178 7179 SMLoc S = getLoc(); 7180 7181 if (!trySkipId("dim", AsmToken::Colon)) 7182 return MatchOperand_NoMatch; 7183 7184 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7185 // integer. 7186 std::string Token; 7187 if (isToken(AsmToken::Integer)) { 7188 SMLoc Loc = getToken().getEndLoc(); 7189 Token = std::string(getTokenStr()); 7190 lex(); 7191 if (getLoc() != Loc) 7192 return MatchOperand_ParseFail; 7193 } 7194 if (!isToken(AsmToken::Identifier)) 7195 return MatchOperand_ParseFail; 7196 Token += getTokenStr(); 7197 7198 StringRef DimId = Token; 7199 if (DimId.startswith("SQ_RSRC_IMG_")) 7200 DimId = DimId.substr(12); 7201 7202 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7203 if (!DimInfo) 7204 return MatchOperand_ParseFail; 7205 7206 lex(); 7207 7208 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7209 AMDGPUOperand::ImmTyDim)); 7210 return MatchOperand_Success; 7211 } 7212 7213 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7214 SMLoc S = getLoc(); 7215 7216 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7217 return MatchOperand_NoMatch; 7218 7219 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7220 7221 int64_t Sels[8]; 7222 7223 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7224 return MatchOperand_ParseFail; 7225 7226 for (size_t i = 0; i < 8; ++i) { 7227 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7228 return MatchOperand_ParseFail; 7229 7230 SMLoc Loc = getLoc(); 7231 if (getParser().parseAbsoluteExpression(Sels[i])) 7232 return MatchOperand_ParseFail; 7233 if (0 > Sels[i] || 7 < Sels[i]) { 7234 Error(Loc, "expected a 3-bit value"); 7235 return MatchOperand_ParseFail; 7236 } 7237 } 7238 7239 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7240 return MatchOperand_ParseFail; 7241 7242 unsigned DPP8 = 0; 7243 for (size_t i = 0; i < 8; ++i) 7244 DPP8 |= (Sels[i] << (i * 3)); 7245 7246 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7247 return MatchOperand_Success; 7248 } 7249 7250 bool 7251 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7252 const OperandVector &Operands) { 7253 if (Ctrl == "row_share" || 7254 Ctrl == "row_xmask") 7255 return isGFX10Plus(); 7256 7257 if (Ctrl == "wave_shl" || 7258 Ctrl == "wave_shr" || 7259 Ctrl == "wave_rol" || 7260 Ctrl == "wave_ror" || 7261 Ctrl == "row_bcast") 7262 return isVI() || isGFX9(); 7263 7264 return Ctrl == "row_mirror" || 7265 Ctrl == "row_half_mirror" || 7266 Ctrl == "quad_perm" || 7267 Ctrl == "row_shl" || 7268 Ctrl == "row_shr" || 7269 Ctrl == "row_ror"; 7270 } 7271 7272 int64_t 7273 AMDGPUAsmParser::parseDPPCtrlPerm() { 7274 // quad_perm:[%d,%d,%d,%d] 7275 7276 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7277 return -1; 7278 7279 int64_t Val = 0; 7280 for (int i = 0; i < 4; ++i) { 7281 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7282 return -1; 7283 7284 int64_t Temp; 7285 SMLoc Loc = getLoc(); 7286 if (getParser().parseAbsoluteExpression(Temp)) 7287 return -1; 7288 if (Temp < 0 || Temp > 3) { 7289 Error(Loc, "expected a 2-bit value"); 7290 return -1; 7291 } 7292 7293 Val += (Temp << i * 2); 7294 } 7295 7296 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7297 return -1; 7298 7299 return Val; 7300 } 7301 7302 int64_t 7303 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7304 using namespace AMDGPU::DPP; 7305 7306 // sel:%d 7307 7308 int64_t Val; 7309 SMLoc Loc = getLoc(); 7310 7311 if (getParser().parseAbsoluteExpression(Val)) 7312 return -1; 7313 7314 struct DppCtrlCheck { 7315 int64_t Ctrl; 7316 int Lo; 7317 int Hi; 7318 }; 7319 7320 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7321 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7322 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7323 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7324 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7325 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7326 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7327 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7328 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7329 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7330 .Default({-1, 0, 0}); 7331 7332 bool Valid; 7333 if (Check.Ctrl == -1) { 7334 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7335 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7336 } else { 7337 Valid = Check.Lo <= Val && Val <= Check.Hi; 7338 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7339 } 7340 7341 if (!Valid) { 7342 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7343 return -1; 7344 } 7345 7346 return Val; 7347 } 7348 7349 OperandMatchResultTy 7350 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7351 using namespace AMDGPU::DPP; 7352 7353 if (!isToken(AsmToken::Identifier) || 7354 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7355 return MatchOperand_NoMatch; 7356 7357 SMLoc S = getLoc(); 7358 int64_t Val = -1; 7359 StringRef Ctrl; 7360 7361 parseId(Ctrl); 7362 7363 if (Ctrl == "row_mirror") { 7364 Val = DppCtrl::ROW_MIRROR; 7365 } else if (Ctrl == "row_half_mirror") { 7366 Val = DppCtrl::ROW_HALF_MIRROR; 7367 } else { 7368 if (skipToken(AsmToken::Colon, "expected a colon")) { 7369 if (Ctrl == "quad_perm") { 7370 Val = parseDPPCtrlPerm(); 7371 } else { 7372 Val = parseDPPCtrlSel(Ctrl); 7373 } 7374 } 7375 } 7376 7377 if (Val == -1) 7378 return MatchOperand_ParseFail; 7379 7380 Operands.push_back( 7381 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7382 return MatchOperand_Success; 7383 } 7384 7385 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7386 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7387 } 7388 7389 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7390 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7391 } 7392 7393 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7394 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7395 } 7396 7397 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7398 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7399 } 7400 7401 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7402 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7403 } 7404 7405 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7406 OptionalImmIndexMap OptionalIdx; 7407 7408 unsigned I = 1; 7409 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7410 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7411 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7412 } 7413 7414 int Fi = 0; 7415 for (unsigned E = Operands.size(); I != E; ++I) { 7416 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7417 MCOI::TIED_TO); 7418 if (TiedTo != -1) { 7419 assert((unsigned)TiedTo < Inst.getNumOperands()); 7420 // handle tied old or src2 for MAC instructions 7421 Inst.addOperand(Inst.getOperand(TiedTo)); 7422 } 7423 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7424 // Add the register arguments 7425 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7426 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7427 // Skip it. 7428 continue; 7429 } 7430 7431 if (IsDPP8) { 7432 if (Op.isDPP8()) { 7433 Op.addImmOperands(Inst, 1); 7434 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7435 Op.addRegWithFPInputModsOperands(Inst, 2); 7436 } else if (Op.isFI()) { 7437 Fi = Op.getImm(); 7438 } else if (Op.isReg()) { 7439 Op.addRegOperands(Inst, 1); 7440 } else { 7441 llvm_unreachable("Invalid operand type"); 7442 } 7443 } else { 7444 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7445 Op.addRegWithFPInputModsOperands(Inst, 2); 7446 } else if (Op.isDPPCtrl()) { 7447 Op.addImmOperands(Inst, 1); 7448 } else if (Op.isImm()) { 7449 // Handle optional arguments 7450 OptionalIdx[Op.getImmTy()] = I; 7451 } else { 7452 llvm_unreachable("Invalid operand type"); 7453 } 7454 } 7455 } 7456 7457 if (IsDPP8) { 7458 using namespace llvm::AMDGPU::DPP; 7459 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7460 } else { 7461 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7463 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7464 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7465 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7466 } 7467 } 7468 } 7469 7470 //===----------------------------------------------------------------------===// 7471 // sdwa 7472 //===----------------------------------------------------------------------===// 7473 7474 OperandMatchResultTy 7475 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7476 AMDGPUOperand::ImmTy Type) { 7477 using namespace llvm::AMDGPU::SDWA; 7478 7479 SMLoc S = getLoc(); 7480 StringRef Value; 7481 OperandMatchResultTy res; 7482 7483 SMLoc StringLoc; 7484 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7485 if (res != MatchOperand_Success) { 7486 return res; 7487 } 7488 7489 int64_t Int; 7490 Int = StringSwitch<int64_t>(Value) 7491 .Case("BYTE_0", SdwaSel::BYTE_0) 7492 .Case("BYTE_1", SdwaSel::BYTE_1) 7493 .Case("BYTE_2", SdwaSel::BYTE_2) 7494 .Case("BYTE_3", SdwaSel::BYTE_3) 7495 .Case("WORD_0", SdwaSel::WORD_0) 7496 .Case("WORD_1", SdwaSel::WORD_1) 7497 .Case("DWORD", SdwaSel::DWORD) 7498 .Default(0xffffffff); 7499 7500 if (Int == 0xffffffff) { 7501 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7502 return MatchOperand_ParseFail; 7503 } 7504 7505 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7506 return MatchOperand_Success; 7507 } 7508 7509 OperandMatchResultTy 7510 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7511 using namespace llvm::AMDGPU::SDWA; 7512 7513 SMLoc S = getLoc(); 7514 StringRef Value; 7515 OperandMatchResultTy res; 7516 7517 SMLoc StringLoc; 7518 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 7519 if (res != MatchOperand_Success) { 7520 return res; 7521 } 7522 7523 int64_t Int; 7524 Int = StringSwitch<int64_t>(Value) 7525 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7526 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7527 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7528 .Default(0xffffffff); 7529 7530 if (Int == 0xffffffff) { 7531 Error(StringLoc, "invalid dst_unused value"); 7532 return MatchOperand_ParseFail; 7533 } 7534 7535 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7536 return MatchOperand_Success; 7537 } 7538 7539 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7540 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7541 } 7542 7543 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7544 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7545 } 7546 7547 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7548 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7549 } 7550 7551 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7552 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7553 } 7554 7555 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7556 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7557 } 7558 7559 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7560 uint64_t BasicInstType, 7561 bool SkipDstVcc, 7562 bool SkipSrcVcc) { 7563 using namespace llvm::AMDGPU::SDWA; 7564 7565 OptionalImmIndexMap OptionalIdx; 7566 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7567 bool SkippedVcc = false; 7568 7569 unsigned I = 1; 7570 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7571 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7572 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7573 } 7574 7575 for (unsigned E = Operands.size(); I != E; ++I) { 7576 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7577 if (SkipVcc && !SkippedVcc && Op.isReg() && 7578 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7579 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7580 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7581 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7582 // Skip VCC only if we didn't skip it on previous iteration. 7583 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7584 if (BasicInstType == SIInstrFlags::VOP2 && 7585 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7586 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7587 SkippedVcc = true; 7588 continue; 7589 } else if (BasicInstType == SIInstrFlags::VOPC && 7590 Inst.getNumOperands() == 0) { 7591 SkippedVcc = true; 7592 continue; 7593 } 7594 } 7595 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7596 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7597 } else if (Op.isImm()) { 7598 // Handle optional arguments 7599 OptionalIdx[Op.getImmTy()] = I; 7600 } else { 7601 llvm_unreachable("Invalid operand type"); 7602 } 7603 SkippedVcc = false; 7604 } 7605 7606 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7607 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7608 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7609 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7610 switch (BasicInstType) { 7611 case SIInstrFlags::VOP1: 7612 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7613 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7614 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7615 } 7616 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7617 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7618 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7619 break; 7620 7621 case SIInstrFlags::VOP2: 7622 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7623 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7624 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7625 } 7626 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7627 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7628 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7629 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7630 break; 7631 7632 case SIInstrFlags::VOPC: 7633 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7634 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7635 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7636 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7637 break; 7638 7639 default: 7640 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7641 } 7642 } 7643 7644 // special case v_mac_{f16, f32}: 7645 // it has src2 register operand that is tied to dst operand 7646 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7647 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7648 auto it = Inst.begin(); 7649 std::advance( 7650 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7651 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7652 } 7653 } 7654 7655 //===----------------------------------------------------------------------===// 7656 // mAI 7657 //===----------------------------------------------------------------------===// 7658 7659 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7660 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7661 } 7662 7663 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7664 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7665 } 7666 7667 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7668 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7669 } 7670 7671 /// Force static initialization. 7672 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7673 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7674 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7675 } 7676 7677 #define GET_REGISTER_MATCHER 7678 #define GET_MATCHER_IMPLEMENTATION 7679 #define GET_MNEMONIC_SPELL_CHECKER 7680 #define GET_MNEMONIC_CHECKER 7681 #include "AMDGPUGenAsmMatcher.inc" 7682 7683 // This fuction should be defined after auto-generated include so that we have 7684 // MatchClassKind enum defined 7685 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7686 unsigned Kind) { 7687 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7688 // But MatchInstructionImpl() expects to meet token and fails to validate 7689 // operand. This method checks if we are given immediate operand but expect to 7690 // get corresponding token. 7691 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7692 switch (Kind) { 7693 case MCK_addr64: 7694 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7695 case MCK_gds: 7696 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7697 case MCK_lds: 7698 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7699 case MCK_glc: 7700 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7701 case MCK_idxen: 7702 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7703 case MCK_offen: 7704 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7705 case MCK_SSrcB32: 7706 // When operands have expression values, they will return true for isToken, 7707 // because it is not possible to distinguish between a token and an 7708 // expression at parse time. MatchInstructionImpl() will always try to 7709 // match an operand as a token, when isToken returns true, and when the 7710 // name of the expression is not a valid token, the match will fail, 7711 // so we need to handle it here. 7712 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7713 case MCK_SSrcF32: 7714 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7715 case MCK_SoppBrTarget: 7716 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7717 case MCK_VReg32OrOff: 7718 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7719 case MCK_InterpSlot: 7720 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7721 case MCK_Attr: 7722 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7723 case MCK_AttrChan: 7724 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7725 case MCK_ImmSMEMOffset: 7726 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7727 case MCK_SReg_64: 7728 case MCK_SReg_64_XEXEC: 7729 // Null is defined as a 32-bit register but 7730 // it should also be enabled with 64-bit operands. 7731 // The following code enables it for SReg_64 operands 7732 // used as source and destination. Remaining source 7733 // operands are handled in isInlinableImm. 7734 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7735 default: 7736 return Match_InvalidOperand; 7737 } 7738 } 7739 7740 //===----------------------------------------------------------------------===// 7741 // endpgm 7742 //===----------------------------------------------------------------------===// 7743 7744 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7745 SMLoc S = getLoc(); 7746 int64_t Imm = 0; 7747 7748 if (!parseExpr(Imm)) { 7749 // The operand is optional, if not present default to 0 7750 Imm = 0; 7751 } 7752 7753 if (!isUInt<16>(Imm)) { 7754 Error(S, "expected a 16-bit value"); 7755 return MatchOperand_ParseFail; 7756 } 7757 7758 Operands.push_back( 7759 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7760 return MatchOperand_Success; 7761 } 7762 7763 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7764