1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyCPol, 118 ImmTySWZ, 119 ImmTyTFE, 120 ImmTyD16, 121 ImmTyClampSI, 122 ImmTyOModSI, 123 ImmTyDPP8, 124 ImmTyDppCtrl, 125 ImmTyDppRowMask, 126 ImmTyDppBankMask, 127 ImmTyDppBoundCtrl, 128 ImmTyDppFi, 129 ImmTySdwaDstSel, 130 ImmTySdwaSrc0Sel, 131 ImmTySdwaSrc1Sel, 132 ImmTySdwaDstUnused, 133 ImmTyDMask, 134 ImmTyDim, 135 ImmTyUNorm, 136 ImmTyDA, 137 ImmTyR128A16, 138 ImmTyA16, 139 ImmTyLWE, 140 ImmTyExpTgt, 141 ImmTyExpCompr, 142 ImmTyExpVM, 143 ImmTyFORMAT, 144 ImmTyHwreg, 145 ImmTyOff, 146 ImmTySendMsg, 147 ImmTyInterpSlot, 148 ImmTyInterpAttr, 149 ImmTyAttrChan, 150 ImmTyOpSel, 151 ImmTyOpSelHi, 152 ImmTyNegLo, 153 ImmTyNegHi, 154 ImmTySwizzle, 155 ImmTyGprIdxMode, 156 ImmTyHigh, 157 ImmTyBLGP, 158 ImmTyCBSZ, 159 ImmTyABID, 160 ImmTyEndpgm, 161 }; 162 163 enum ImmKindTy { 164 ImmKindTyNone, 165 ImmKindTyLiteral, 166 ImmKindTyConst, 167 }; 168 169 private: 170 struct TokOp { 171 const char *Data; 172 unsigned Length; 173 }; 174 175 struct ImmOp { 176 int64_t Val; 177 ImmTy Type; 178 bool IsFPImm; 179 mutable ImmKindTy Kind; 180 Modifiers Mods; 181 }; 182 183 struct RegOp { 184 unsigned RegNo; 185 Modifiers Mods; 186 }; 187 188 union { 189 TokOp Tok; 190 ImmOp Imm; 191 RegOp Reg; 192 const MCExpr *Expr; 193 }; 194 195 public: 196 bool isToken() const override { 197 if (Kind == Token) 198 return true; 199 200 // When parsing operands, we can't always tell if something was meant to be 201 // a token, like 'gds', or an expression that references a global variable. 202 // In this case, we assume the string is an expression, and if we need to 203 // interpret is a token, then we treat the symbol name as the token. 204 return isSymbolRefExpr(); 205 } 206 207 bool isSymbolRefExpr() const { 208 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 209 } 210 211 bool isImm() const override { 212 return Kind == Immediate; 213 } 214 215 void setImmKindNone() const { 216 assert(isImm()); 217 Imm.Kind = ImmKindTyNone; 218 } 219 220 void setImmKindLiteral() const { 221 assert(isImm()); 222 Imm.Kind = ImmKindTyLiteral; 223 } 224 225 void setImmKindConst() const { 226 assert(isImm()); 227 Imm.Kind = ImmKindTyConst; 228 } 229 230 bool IsImmKindLiteral() const { 231 return isImm() && Imm.Kind == ImmKindTyLiteral; 232 } 233 234 bool isImmKindConst() const { 235 return isImm() && Imm.Kind == ImmKindTyConst; 236 } 237 238 bool isInlinableImm(MVT type) const; 239 bool isLiteralImm(MVT type) const; 240 241 bool isRegKind() const { 242 return Kind == Register; 243 } 244 245 bool isReg() const override { 246 return isRegKind() && !hasModifiers(); 247 } 248 249 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 250 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 251 } 252 253 bool isRegOrImmWithInt16InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 255 } 256 257 bool isRegOrImmWithInt32InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 259 } 260 261 bool isRegOrImmWithInt64InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 263 } 264 265 bool isRegOrImmWithFP16InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 267 } 268 269 bool isRegOrImmWithFP32InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 271 } 272 273 bool isRegOrImmWithFP64InputMods() const { 274 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 275 } 276 277 bool isVReg() const { 278 return isRegClass(AMDGPU::VGPR_32RegClassID) || 279 isRegClass(AMDGPU::VReg_64RegClassID) || 280 isRegClass(AMDGPU::VReg_96RegClassID) || 281 isRegClass(AMDGPU::VReg_128RegClassID) || 282 isRegClass(AMDGPU::VReg_160RegClassID) || 283 isRegClass(AMDGPU::VReg_192RegClassID) || 284 isRegClass(AMDGPU::VReg_256RegClassID) || 285 isRegClass(AMDGPU::VReg_512RegClassID) || 286 isRegClass(AMDGPU::VReg_1024RegClassID); 287 } 288 289 bool isVReg32() const { 290 return isRegClass(AMDGPU::VGPR_32RegClassID); 291 } 292 293 bool isVReg32OrOff() const { 294 return isOff() || isVReg32(); 295 } 296 297 bool isNull() const { 298 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 299 } 300 301 bool isVRegWithInputMods() const; 302 303 bool isSDWAOperand(MVT type) const; 304 bool isSDWAFP16Operand() const; 305 bool isSDWAFP32Operand() const; 306 bool isSDWAInt16Operand() const; 307 bool isSDWAInt32Operand() const; 308 309 bool isImmTy(ImmTy ImmT) const { 310 return isImm() && Imm.Type == ImmT; 311 } 312 313 bool isImmModifier() const { 314 return isImm() && Imm.Type != ImmTyNone; 315 } 316 317 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 318 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 319 bool isDMask() const { return isImmTy(ImmTyDMask); } 320 bool isDim() const { return isImmTy(ImmTyDim); } 321 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 322 bool isDA() const { return isImmTy(ImmTyDA); } 323 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 324 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 325 bool isLWE() const { return isImmTy(ImmTyLWE); } 326 bool isOff() const { return isImmTy(ImmTyOff); } 327 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 328 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 329 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 330 bool isOffen() const { return isImmTy(ImmTyOffen); } 331 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 332 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 333 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 334 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 335 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 336 337 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 338 bool isGDS() const { return isImmTy(ImmTyGDS); } 339 bool isLDS() const { return isImmTy(ImmTyLDS); } 340 bool isCPol() const { return isImmTy(ImmTyCPol); } 341 // "CPol_GLC1" is a MatchClass of the CPOL_GLC1 operand with the default and 342 // forced value of the GLC operand. 343 bool isCPol_GLC1() const { return isImmTy(ImmTyCPol); } 344 bool isSWZ() const { return isImmTy(ImmTySWZ); } 345 bool isTFE() const { return isImmTy(ImmTyTFE); } 346 bool isD16() const { return isImmTy(ImmTyD16); } 347 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 348 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 349 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 350 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 351 bool isFI() const { return isImmTy(ImmTyDppFi); } 352 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 353 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 354 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 355 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 356 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 357 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 358 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 359 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 360 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 361 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 362 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 363 bool isHigh() const { return isImmTy(ImmTyHigh); } 364 365 bool isMod() const { 366 return isClampSI() || isOModSI(); 367 } 368 369 bool isRegOrImm() const { 370 return isReg() || isImm(); 371 } 372 373 bool isRegClass(unsigned RCID) const; 374 375 bool isInlineValue() const; 376 377 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 378 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 379 } 380 381 bool isSCSrcB16() const { 382 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 383 } 384 385 bool isSCSrcV2B16() const { 386 return isSCSrcB16(); 387 } 388 389 bool isSCSrcB32() const { 390 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 391 } 392 393 bool isSCSrcB64() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 395 } 396 397 bool isBoolReg() const; 398 399 bool isSCSrcF16() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 401 } 402 403 bool isSCSrcV2F16() const { 404 return isSCSrcF16(); 405 } 406 407 bool isSCSrcF32() const { 408 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 409 } 410 411 bool isSCSrcF64() const { 412 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 413 } 414 415 bool isSSrcB32() const { 416 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 417 } 418 419 bool isSSrcB16() const { 420 return isSCSrcB16() || isLiteralImm(MVT::i16); 421 } 422 423 bool isSSrcV2B16() const { 424 llvm_unreachable("cannot happen"); 425 return isSSrcB16(); 426 } 427 428 bool isSSrcB64() const { 429 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 430 // See isVSrc64(). 431 return isSCSrcB64() || isLiteralImm(MVT::i64); 432 } 433 434 bool isSSrcF32() const { 435 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 436 } 437 438 bool isSSrcF64() const { 439 return isSCSrcB64() || isLiteralImm(MVT::f64); 440 } 441 442 bool isSSrcF16() const { 443 return isSCSrcB16() || isLiteralImm(MVT::f16); 444 } 445 446 bool isSSrcV2F16() const { 447 llvm_unreachable("cannot happen"); 448 return isSSrcF16(); 449 } 450 451 bool isSSrcV2FP32() const { 452 llvm_unreachable("cannot happen"); 453 return isSSrcF32(); 454 } 455 456 bool isSCSrcV2FP32() const { 457 llvm_unreachable("cannot happen"); 458 return isSCSrcF32(); 459 } 460 461 bool isSSrcV2INT32() const { 462 llvm_unreachable("cannot happen"); 463 return isSSrcB32(); 464 } 465 466 bool isSCSrcV2INT32() const { 467 llvm_unreachable("cannot happen"); 468 return isSCSrcB32(); 469 } 470 471 bool isSSrcOrLdsB32() const { 472 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 473 isLiteralImm(MVT::i32) || isExpr(); 474 } 475 476 bool isVCSrcB32() const { 477 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 478 } 479 480 bool isVCSrcB64() const { 481 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 482 } 483 484 bool isVCSrcB16() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 486 } 487 488 bool isVCSrcV2B16() const { 489 return isVCSrcB16(); 490 } 491 492 bool isVCSrcF32() const { 493 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 494 } 495 496 bool isVCSrcF64() const { 497 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 498 } 499 500 bool isVCSrcF16() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 502 } 503 504 bool isVCSrcV2F16() const { 505 return isVCSrcF16(); 506 } 507 508 bool isVSrcB32() const { 509 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 510 } 511 512 bool isVSrcB64() const { 513 return isVCSrcF64() || isLiteralImm(MVT::i64); 514 } 515 516 bool isVSrcB16() const { 517 return isVCSrcB16() || isLiteralImm(MVT::i16); 518 } 519 520 bool isVSrcV2B16() const { 521 return isVSrcB16() || isLiteralImm(MVT::v2i16); 522 } 523 524 bool isVCSrcV2FP32() const { 525 return isVCSrcF64(); 526 } 527 528 bool isVSrcV2FP32() const { 529 return isVSrcF64() || isLiteralImm(MVT::v2f32); 530 } 531 532 bool isVCSrcV2INT32() const { 533 return isVCSrcB64(); 534 } 535 536 bool isVSrcV2INT32() const { 537 return isVSrcB64() || isLiteralImm(MVT::v2i32); 538 } 539 540 bool isVSrcF32() const { 541 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 542 } 543 544 bool isVSrcF64() const { 545 return isVCSrcF64() || isLiteralImm(MVT::f64); 546 } 547 548 bool isVSrcF16() const { 549 return isVCSrcF16() || isLiteralImm(MVT::f16); 550 } 551 552 bool isVSrcV2F16() const { 553 return isVSrcF16() || isLiteralImm(MVT::v2f16); 554 } 555 556 bool isVISrcB32() const { 557 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 558 } 559 560 bool isVISrcB16() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 562 } 563 564 bool isVISrcV2B16() const { 565 return isVISrcB16(); 566 } 567 568 bool isVISrcF32() const { 569 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 570 } 571 572 bool isVISrcF16() const { 573 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 574 } 575 576 bool isVISrcV2F16() const { 577 return isVISrcF16() || isVISrcB32(); 578 } 579 580 bool isVISrc_64B64() const { 581 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 582 } 583 584 bool isVISrc_64F64() const { 585 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 586 } 587 588 bool isVISrc_64V2FP32() const { 589 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 590 } 591 592 bool isVISrc_64V2INT32() const { 593 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 594 } 595 596 bool isVISrc_256B64() const { 597 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 598 } 599 600 bool isVISrc_256F64() const { 601 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 602 } 603 604 bool isVISrc_128B16() const { 605 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 606 } 607 608 bool isVISrc_128V2B16() const { 609 return isVISrc_128B16(); 610 } 611 612 bool isVISrc_128B32() const { 613 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 614 } 615 616 bool isVISrc_128F32() const { 617 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 618 } 619 620 bool isVISrc_256V2FP32() const { 621 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 622 } 623 624 bool isVISrc_256V2INT32() const { 625 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 626 } 627 628 bool isVISrc_512B32() const { 629 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 630 } 631 632 bool isVISrc_512B16() const { 633 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 634 } 635 636 bool isVISrc_512V2B16() const { 637 return isVISrc_512B16(); 638 } 639 640 bool isVISrc_512F32() const { 641 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 642 } 643 644 bool isVISrc_512F16() const { 645 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 646 } 647 648 bool isVISrc_512V2F16() const { 649 return isVISrc_512F16() || isVISrc_512B32(); 650 } 651 652 bool isVISrc_1024B32() const { 653 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 654 } 655 656 bool isVISrc_1024B16() const { 657 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 658 } 659 660 bool isVISrc_1024V2B16() const { 661 return isVISrc_1024B16(); 662 } 663 664 bool isVISrc_1024F32() const { 665 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 666 } 667 668 bool isVISrc_1024F16() const { 669 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 670 } 671 672 bool isVISrc_1024V2F16() const { 673 return isVISrc_1024F16() || isVISrc_1024B32(); 674 } 675 676 bool isAISrcB32() const { 677 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 678 } 679 680 bool isAISrcB16() const { 681 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 682 } 683 684 bool isAISrcV2B16() const { 685 return isAISrcB16(); 686 } 687 688 bool isAISrcF32() const { 689 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 690 } 691 692 bool isAISrcF16() const { 693 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 694 } 695 696 bool isAISrcV2F16() const { 697 return isAISrcF16() || isAISrcB32(); 698 } 699 700 bool isAISrc_64B64() const { 701 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 702 } 703 704 bool isAISrc_64F64() const { 705 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 706 } 707 708 bool isAISrc_128B32() const { 709 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 710 } 711 712 bool isAISrc_128B16() const { 713 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 714 } 715 716 bool isAISrc_128V2B16() const { 717 return isAISrc_128B16(); 718 } 719 720 bool isAISrc_128F32() const { 721 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 722 } 723 724 bool isAISrc_128F16() const { 725 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 726 } 727 728 bool isAISrc_128V2F16() const { 729 return isAISrc_128F16() || isAISrc_128B32(); 730 } 731 732 bool isVISrc_128F16() const { 733 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 734 } 735 736 bool isVISrc_128V2F16() const { 737 return isVISrc_128F16() || isVISrc_128B32(); 738 } 739 740 bool isAISrc_256B64() const { 741 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 742 } 743 744 bool isAISrc_256F64() const { 745 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 746 } 747 748 bool isAISrc_512B32() const { 749 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 750 } 751 752 bool isAISrc_512B16() const { 753 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 754 } 755 756 bool isAISrc_512V2B16() const { 757 return isAISrc_512B16(); 758 } 759 760 bool isAISrc_512F32() const { 761 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 762 } 763 764 bool isAISrc_512F16() const { 765 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 766 } 767 768 bool isAISrc_512V2F16() const { 769 return isAISrc_512F16() || isAISrc_512B32(); 770 } 771 772 bool isAISrc_1024B32() const { 773 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 774 } 775 776 bool isAISrc_1024B16() const { 777 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 778 } 779 780 bool isAISrc_1024V2B16() const { 781 return isAISrc_1024B16(); 782 } 783 784 bool isAISrc_1024F32() const { 785 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 786 } 787 788 bool isAISrc_1024F16() const { 789 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 790 } 791 792 bool isAISrc_1024V2F16() const { 793 return isAISrc_1024F16() || isAISrc_1024B32(); 794 } 795 796 bool isKImmFP32() const { 797 return isLiteralImm(MVT::f32); 798 } 799 800 bool isKImmFP16() const { 801 return isLiteralImm(MVT::f16); 802 } 803 804 bool isMem() const override { 805 return false; 806 } 807 808 bool isExpr() const { 809 return Kind == Expression; 810 } 811 812 bool isSoppBrTarget() const { 813 return isExpr() || isImm(); 814 } 815 816 bool isSWaitCnt() const; 817 bool isHwreg() const; 818 bool isSendMsg() const; 819 bool isSwizzle() const; 820 bool isSMRDOffset8() const; 821 bool isSMEMOffset() const; 822 bool isSMRDLiteralOffset() const; 823 bool isDPP8() const; 824 bool isDPPCtrl() const; 825 bool isBLGP() const; 826 bool isCBSZ() const; 827 bool isABID() const; 828 bool isGPRIdxMode() const; 829 bool isS16Imm() const; 830 bool isU16Imm() const; 831 bool isEndpgm() const; 832 833 StringRef getExpressionAsToken() const { 834 assert(isExpr()); 835 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 836 return S->getSymbol().getName(); 837 } 838 839 StringRef getToken() const { 840 assert(isToken()); 841 842 if (Kind == Expression) 843 return getExpressionAsToken(); 844 845 return StringRef(Tok.Data, Tok.Length); 846 } 847 848 int64_t getImm() const { 849 assert(isImm()); 850 return Imm.Val; 851 } 852 853 void setImm(int64_t Val) { 854 assert(isImm()); 855 Imm.Val = Val; 856 } 857 858 ImmTy getImmTy() const { 859 assert(isImm()); 860 return Imm.Type; 861 } 862 863 unsigned getReg() const override { 864 assert(isRegKind()); 865 return Reg.RegNo; 866 } 867 868 SMLoc getStartLoc() const override { 869 return StartLoc; 870 } 871 872 SMLoc getEndLoc() const override { 873 return EndLoc; 874 } 875 876 SMRange getLocRange() const { 877 return SMRange(StartLoc, EndLoc); 878 } 879 880 Modifiers getModifiers() const { 881 assert(isRegKind() || isImmTy(ImmTyNone)); 882 return isRegKind() ? Reg.Mods : Imm.Mods; 883 } 884 885 void setModifiers(Modifiers Mods) { 886 assert(isRegKind() || isImmTy(ImmTyNone)); 887 if (isRegKind()) 888 Reg.Mods = Mods; 889 else 890 Imm.Mods = Mods; 891 } 892 893 bool hasModifiers() const { 894 return getModifiers().hasModifiers(); 895 } 896 897 bool hasFPModifiers() const { 898 return getModifiers().hasFPModifiers(); 899 } 900 901 bool hasIntModifiers() const { 902 return getModifiers().hasIntModifiers(); 903 } 904 905 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 906 907 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 908 909 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 910 911 template <unsigned Bitwidth> 912 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 913 914 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 915 addKImmFPOperands<16>(Inst, N); 916 } 917 918 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 919 addKImmFPOperands<32>(Inst, N); 920 } 921 922 void addRegOperands(MCInst &Inst, unsigned N) const; 923 924 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 925 addRegOperands(Inst, N); 926 } 927 928 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 929 if (isRegKind()) 930 addRegOperands(Inst, N); 931 else if (isExpr()) 932 Inst.addOperand(MCOperand::createExpr(Expr)); 933 else 934 addImmOperands(Inst, N); 935 } 936 937 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 938 Modifiers Mods = getModifiers(); 939 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 940 if (isRegKind()) { 941 addRegOperands(Inst, N); 942 } else { 943 addImmOperands(Inst, N, false); 944 } 945 } 946 947 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 948 assert(!hasIntModifiers()); 949 addRegOrImmWithInputModsOperands(Inst, N); 950 } 951 952 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 953 assert(!hasFPModifiers()); 954 addRegOrImmWithInputModsOperands(Inst, N); 955 } 956 957 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 958 Modifiers Mods = getModifiers(); 959 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 960 assert(isRegKind()); 961 addRegOperands(Inst, N); 962 } 963 964 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 965 assert(!hasIntModifiers()); 966 addRegWithInputModsOperands(Inst, N); 967 } 968 969 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 970 assert(!hasFPModifiers()); 971 addRegWithInputModsOperands(Inst, N); 972 } 973 974 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 975 if (isImm()) 976 addImmOperands(Inst, N); 977 else { 978 assert(isExpr()); 979 Inst.addOperand(MCOperand::createExpr(Expr)); 980 } 981 } 982 983 static void printImmTy(raw_ostream& OS, ImmTy Type) { 984 switch (Type) { 985 case ImmTyNone: OS << "None"; break; 986 case ImmTyGDS: OS << "GDS"; break; 987 case ImmTyLDS: OS << "LDS"; break; 988 case ImmTyOffen: OS << "Offen"; break; 989 case ImmTyIdxen: OS << "Idxen"; break; 990 case ImmTyAddr64: OS << "Addr64"; break; 991 case ImmTyOffset: OS << "Offset"; break; 992 case ImmTyInstOffset: OS << "InstOffset"; break; 993 case ImmTyOffset0: OS << "Offset0"; break; 994 case ImmTyOffset1: OS << "Offset1"; break; 995 case ImmTyCPol: OS << "CPol"; break; 996 case ImmTySWZ: OS << "SWZ"; break; 997 case ImmTyTFE: OS << "TFE"; break; 998 case ImmTyD16: OS << "D16"; break; 999 case ImmTyFORMAT: OS << "FORMAT"; break; 1000 case ImmTyClampSI: OS << "ClampSI"; break; 1001 case ImmTyOModSI: OS << "OModSI"; break; 1002 case ImmTyDPP8: OS << "DPP8"; break; 1003 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1004 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1005 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1006 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1007 case ImmTyDppFi: OS << "FI"; break; 1008 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1009 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1010 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1011 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1012 case ImmTyDMask: OS << "DMask"; break; 1013 case ImmTyDim: OS << "Dim"; break; 1014 case ImmTyUNorm: OS << "UNorm"; break; 1015 case ImmTyDA: OS << "DA"; break; 1016 case ImmTyR128A16: OS << "R128A16"; break; 1017 case ImmTyA16: OS << "A16"; break; 1018 case ImmTyLWE: OS << "LWE"; break; 1019 case ImmTyOff: OS << "Off"; break; 1020 case ImmTyExpTgt: OS << "ExpTgt"; break; 1021 case ImmTyExpCompr: OS << "ExpCompr"; break; 1022 case ImmTyExpVM: OS << "ExpVM"; break; 1023 case ImmTyHwreg: OS << "Hwreg"; break; 1024 case ImmTySendMsg: OS << "SendMsg"; break; 1025 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1026 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1027 case ImmTyAttrChan: OS << "AttrChan"; break; 1028 case ImmTyOpSel: OS << "OpSel"; break; 1029 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1030 case ImmTyNegLo: OS << "NegLo"; break; 1031 case ImmTyNegHi: OS << "NegHi"; break; 1032 case ImmTySwizzle: OS << "Swizzle"; break; 1033 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1034 case ImmTyHigh: OS << "High"; break; 1035 case ImmTyBLGP: OS << "BLGP"; break; 1036 case ImmTyCBSZ: OS << "CBSZ"; break; 1037 case ImmTyABID: OS << "ABID"; break; 1038 case ImmTyEndpgm: OS << "Endpgm"; break; 1039 } 1040 } 1041 1042 void print(raw_ostream &OS) const override { 1043 switch (Kind) { 1044 case Register: 1045 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1046 break; 1047 case Immediate: 1048 OS << '<' << getImm(); 1049 if (getImmTy() != ImmTyNone) { 1050 OS << " type: "; printImmTy(OS, getImmTy()); 1051 } 1052 OS << " mods: " << Imm.Mods << '>'; 1053 break; 1054 case Token: 1055 OS << '\'' << getToken() << '\''; 1056 break; 1057 case Expression: 1058 OS << "<expr " << *Expr << '>'; 1059 break; 1060 } 1061 } 1062 1063 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1064 int64_t Val, SMLoc Loc, 1065 ImmTy Type = ImmTyNone, 1066 bool IsFPImm = false) { 1067 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1068 Op->Imm.Val = Val; 1069 Op->Imm.IsFPImm = IsFPImm; 1070 Op->Imm.Kind = ImmKindTyNone; 1071 Op->Imm.Type = Type; 1072 Op->Imm.Mods = Modifiers(); 1073 Op->StartLoc = Loc; 1074 Op->EndLoc = Loc; 1075 return Op; 1076 } 1077 1078 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1079 StringRef Str, SMLoc Loc, 1080 bool HasExplicitEncodingSize = true) { 1081 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1082 Res->Tok.Data = Str.data(); 1083 Res->Tok.Length = Str.size(); 1084 Res->StartLoc = Loc; 1085 Res->EndLoc = Loc; 1086 return Res; 1087 } 1088 1089 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1090 unsigned RegNo, SMLoc S, 1091 SMLoc E) { 1092 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1093 Op->Reg.RegNo = RegNo; 1094 Op->Reg.Mods = Modifiers(); 1095 Op->StartLoc = S; 1096 Op->EndLoc = E; 1097 return Op; 1098 } 1099 1100 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1101 const class MCExpr *Expr, SMLoc S) { 1102 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1103 Op->Expr = Expr; 1104 Op->StartLoc = S; 1105 Op->EndLoc = S; 1106 return Op; 1107 } 1108 }; 1109 1110 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1111 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1112 return OS; 1113 } 1114 1115 //===----------------------------------------------------------------------===// 1116 // AsmParser 1117 //===----------------------------------------------------------------------===// 1118 1119 // Holds info related to the current kernel, e.g. count of SGPRs used. 1120 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1121 // .amdgpu_hsa_kernel or at EOF. 1122 class KernelScopeInfo { 1123 int SgprIndexUnusedMin = -1; 1124 int VgprIndexUnusedMin = -1; 1125 MCContext *Ctx = nullptr; 1126 1127 void usesSgprAt(int i) { 1128 if (i >= SgprIndexUnusedMin) { 1129 SgprIndexUnusedMin = ++i; 1130 if (Ctx) { 1131 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1132 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1133 } 1134 } 1135 } 1136 1137 void usesVgprAt(int i) { 1138 if (i >= VgprIndexUnusedMin) { 1139 VgprIndexUnusedMin = ++i; 1140 if (Ctx) { 1141 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1142 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1143 } 1144 } 1145 } 1146 1147 public: 1148 KernelScopeInfo() = default; 1149 1150 void initialize(MCContext &Context) { 1151 Ctx = &Context; 1152 usesSgprAt(SgprIndexUnusedMin = -1); 1153 usesVgprAt(VgprIndexUnusedMin = -1); 1154 } 1155 1156 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1157 switch (RegKind) { 1158 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1159 case IS_AGPR: // fall through 1160 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1161 default: break; 1162 } 1163 } 1164 }; 1165 1166 class AMDGPUAsmParser : public MCTargetAsmParser { 1167 MCAsmParser &Parser; 1168 1169 // Number of extra operands parsed after the first optional operand. 1170 // This may be necessary to skip hardcoded mandatory operands. 1171 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1172 1173 unsigned ForcedEncodingSize = 0; 1174 bool ForcedDPP = false; 1175 bool ForcedSDWA = false; 1176 KernelScopeInfo KernelScope; 1177 unsigned CPolSeen; 1178 1179 /// @name Auto-generated Match Functions 1180 /// { 1181 1182 #define GET_ASSEMBLER_HEADER 1183 #include "AMDGPUGenAsmMatcher.inc" 1184 1185 /// } 1186 1187 private: 1188 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1189 bool OutOfRangeError(SMRange Range); 1190 /// Calculate VGPR/SGPR blocks required for given target, reserved 1191 /// registers, and user-specified NextFreeXGPR values. 1192 /// 1193 /// \param Features [in] Target features, used for bug corrections. 1194 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1195 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1196 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1197 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1198 /// descriptor field, if valid. 1199 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1200 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1201 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1202 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1203 /// \param VGPRBlocks [out] Result VGPR block count. 1204 /// \param SGPRBlocks [out] Result SGPR block count. 1205 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1206 bool FlatScrUsed, bool XNACKUsed, 1207 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1208 SMRange VGPRRange, unsigned NextFreeSGPR, 1209 SMRange SGPRRange, unsigned &VGPRBlocks, 1210 unsigned &SGPRBlocks); 1211 bool ParseDirectiveAMDGCNTarget(); 1212 bool ParseDirectiveAMDHSAKernel(); 1213 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1214 bool ParseDirectiveHSACodeObjectVersion(); 1215 bool ParseDirectiveHSACodeObjectISA(); 1216 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1217 bool ParseDirectiveAMDKernelCodeT(); 1218 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1219 bool ParseDirectiveAMDGPUHsaKernel(); 1220 1221 bool ParseDirectiveISAVersion(); 1222 bool ParseDirectiveHSAMetadata(); 1223 bool ParseDirectivePALMetadataBegin(); 1224 bool ParseDirectivePALMetadata(); 1225 bool ParseDirectiveAMDGPULDS(); 1226 1227 /// Common code to parse out a block of text (typically YAML) between start and 1228 /// end directives. 1229 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1230 const char *AssemblerDirectiveEnd, 1231 std::string &CollectString); 1232 1233 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1234 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1235 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1236 unsigned &RegNum, unsigned &RegWidth, 1237 bool RestoreOnFailure = false); 1238 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1239 unsigned &RegNum, unsigned &RegWidth, 1240 SmallVectorImpl<AsmToken> &Tokens); 1241 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1242 unsigned &RegWidth, 1243 SmallVectorImpl<AsmToken> &Tokens); 1244 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1245 unsigned &RegWidth, 1246 SmallVectorImpl<AsmToken> &Tokens); 1247 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1248 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1249 bool ParseRegRange(unsigned& Num, unsigned& Width); 1250 unsigned getRegularReg(RegisterKind RegKind, 1251 unsigned RegNum, 1252 unsigned RegWidth, 1253 SMLoc Loc); 1254 1255 bool isRegister(); 1256 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1257 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1258 void initializeGprCountSymbol(RegisterKind RegKind); 1259 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1260 unsigned RegWidth); 1261 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1262 bool IsAtomic, bool IsLds = false); 1263 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1264 bool IsGdsHardcoded); 1265 1266 public: 1267 enum AMDGPUMatchResultTy { 1268 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1269 }; 1270 enum OperandMode { 1271 OperandMode_Default, 1272 OperandMode_NSA, 1273 }; 1274 1275 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1276 1277 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1278 const MCInstrInfo &MII, 1279 const MCTargetOptions &Options) 1280 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1281 MCAsmParserExtension::Initialize(Parser); 1282 1283 if (getFeatureBits().none()) { 1284 // Set default features. 1285 copySTI().ToggleFeature("southern-islands"); 1286 } 1287 1288 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1289 1290 { 1291 // TODO: make those pre-defined variables read-only. 1292 // Currently there is none suitable machinery in the core llvm-mc for this. 1293 // MCSymbol::isRedefinable is intended for another purpose, and 1294 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1295 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1296 MCContext &Ctx = getContext(); 1297 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1298 MCSymbol *Sym = 1299 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1300 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1301 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1302 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1303 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1304 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1305 } else { 1306 MCSymbol *Sym = 1307 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1311 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1312 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1313 } 1314 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1315 initializeGprCountSymbol(IS_VGPR); 1316 initializeGprCountSymbol(IS_SGPR); 1317 } else 1318 KernelScope.initialize(getContext()); 1319 } 1320 } 1321 1322 bool hasXNACK() const { 1323 return AMDGPU::hasXNACK(getSTI()); 1324 } 1325 1326 bool hasMIMG_R128() const { 1327 return AMDGPU::hasMIMG_R128(getSTI()); 1328 } 1329 1330 bool hasPackedD16() const { 1331 return AMDGPU::hasPackedD16(getSTI()); 1332 } 1333 1334 bool hasGFX10A16() const { 1335 return AMDGPU::hasGFX10A16(getSTI()); 1336 } 1337 1338 bool isSI() const { 1339 return AMDGPU::isSI(getSTI()); 1340 } 1341 1342 bool isCI() const { 1343 return AMDGPU::isCI(getSTI()); 1344 } 1345 1346 bool isVI() const { 1347 return AMDGPU::isVI(getSTI()); 1348 } 1349 1350 bool isGFX9() const { 1351 return AMDGPU::isGFX9(getSTI()); 1352 } 1353 1354 bool isGFX90A() const { 1355 return AMDGPU::isGFX90A(getSTI()); 1356 } 1357 1358 bool isGFX9Plus() const { 1359 return AMDGPU::isGFX9Plus(getSTI()); 1360 } 1361 1362 bool isGFX10() const { 1363 return AMDGPU::isGFX10(getSTI()); 1364 } 1365 1366 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1367 1368 bool isGFX10_BEncoding() const { 1369 return AMDGPU::isGFX10_BEncoding(getSTI()); 1370 } 1371 1372 bool hasInv2PiInlineImm() const { 1373 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1374 } 1375 1376 bool hasFlatOffsets() const { 1377 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1378 } 1379 1380 bool hasSGPR102_SGPR103() const { 1381 return !isVI() && !isGFX9(); 1382 } 1383 1384 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1385 1386 bool hasIntClamp() const { 1387 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1388 } 1389 1390 AMDGPUTargetStreamer &getTargetStreamer() { 1391 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1392 return static_cast<AMDGPUTargetStreamer &>(TS); 1393 } 1394 1395 const MCRegisterInfo *getMRI() const { 1396 // We need this const_cast because for some reason getContext() is not const 1397 // in MCAsmParser. 1398 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1399 } 1400 1401 const MCInstrInfo *getMII() const { 1402 return &MII; 1403 } 1404 1405 const FeatureBitset &getFeatureBits() const { 1406 return getSTI().getFeatureBits(); 1407 } 1408 1409 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1410 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1411 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1412 1413 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1414 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1415 bool isForcedDPP() const { return ForcedDPP; } 1416 bool isForcedSDWA() const { return ForcedSDWA; } 1417 ArrayRef<unsigned> getMatchedVariants() const; 1418 StringRef getMatchedVariantName() const; 1419 1420 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1421 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1422 bool RestoreOnFailure); 1423 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1424 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1425 SMLoc &EndLoc) override; 1426 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1427 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1428 unsigned Kind) override; 1429 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1430 OperandVector &Operands, MCStreamer &Out, 1431 uint64_t &ErrorInfo, 1432 bool MatchingInlineAsm) override; 1433 bool ParseDirective(AsmToken DirectiveID) override; 1434 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1435 OperandMode Mode = OperandMode_Default); 1436 StringRef parseMnemonicSuffix(StringRef Name); 1437 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1438 SMLoc NameLoc, OperandVector &Operands) override; 1439 //bool ProcessInstruction(MCInst &Inst); 1440 1441 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1442 1443 OperandMatchResultTy 1444 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1445 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1446 bool (*ConvertResult)(int64_t &) = nullptr); 1447 1448 OperandMatchResultTy 1449 parseOperandArrayWithPrefix(const char *Prefix, 1450 OperandVector &Operands, 1451 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1452 bool (*ConvertResult)(int64_t&) = nullptr); 1453 1454 OperandMatchResultTy 1455 parseNamedBit(StringRef Name, OperandVector &Operands, 1456 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1457 OperandMatchResultTy parseCPol(OperandVector &Operands); 1458 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1459 StringRef &Value, 1460 SMLoc &StringLoc); 1461 1462 bool isModifier(); 1463 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1464 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1465 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1466 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1467 bool parseSP3NegModifier(); 1468 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1469 OperandMatchResultTy parseReg(OperandVector &Operands); 1470 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1471 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1472 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1473 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1474 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1475 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1476 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1477 OperandMatchResultTy parseUfmt(int64_t &Format); 1478 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1479 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1480 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1481 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1482 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1483 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1484 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1485 1486 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1487 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1488 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1489 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1490 1491 bool parseCnt(int64_t &IntVal); 1492 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1493 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1494 1495 private: 1496 struct OperandInfoTy { 1497 SMLoc Loc; 1498 int64_t Id; 1499 bool IsSymbolic = false; 1500 bool IsDefined = false; 1501 1502 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1503 }; 1504 1505 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1506 bool validateSendMsg(const OperandInfoTy &Msg, 1507 const OperandInfoTy &Op, 1508 const OperandInfoTy &Stream); 1509 1510 bool parseHwregBody(OperandInfoTy &HwReg, 1511 OperandInfoTy &Offset, 1512 OperandInfoTy &Width); 1513 bool validateHwreg(const OperandInfoTy &HwReg, 1514 const OperandInfoTy &Offset, 1515 const OperandInfoTy &Width); 1516 1517 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1518 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1519 1520 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1521 const OperandVector &Operands) const; 1522 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1523 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1524 SMLoc getLitLoc(const OperandVector &Operands) const; 1525 SMLoc getConstLoc(const OperandVector &Operands) const; 1526 1527 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1528 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1529 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1530 bool validateSOPLiteral(const MCInst &Inst) const; 1531 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1532 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1533 bool validateIntClampSupported(const MCInst &Inst); 1534 bool validateMIMGAtomicDMask(const MCInst &Inst); 1535 bool validateMIMGGatherDMask(const MCInst &Inst); 1536 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateMIMGDataSize(const MCInst &Inst); 1538 bool validateMIMGAddrSize(const MCInst &Inst); 1539 bool validateMIMGD16(const MCInst &Inst); 1540 bool validateMIMGDim(const MCInst &Inst); 1541 bool validateMIMGMSAA(const MCInst &Inst); 1542 bool validateLdsDirect(const MCInst &Inst); 1543 bool validateOpSel(const MCInst &Inst); 1544 bool validateVccOperand(unsigned Reg) const; 1545 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1546 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1547 bool validateAGPRLdSt(const MCInst &Inst) const; 1548 bool validateVGPRAlign(const MCInst &Inst) const; 1549 bool validateDivScale(const MCInst &Inst); 1550 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1551 const SMLoc &IDLoc); 1552 unsigned getConstantBusLimit(unsigned Opcode) const; 1553 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1554 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1555 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1556 1557 bool isSupportedMnemo(StringRef Mnemo, 1558 const FeatureBitset &FBS); 1559 bool isSupportedMnemo(StringRef Mnemo, 1560 const FeatureBitset &FBS, 1561 ArrayRef<unsigned> Variants); 1562 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1563 1564 bool isId(const StringRef Id) const; 1565 bool isId(const AsmToken &Token, const StringRef Id) const; 1566 bool isToken(const AsmToken::TokenKind Kind) const; 1567 bool trySkipId(const StringRef Id); 1568 bool trySkipId(const StringRef Pref, const StringRef Id); 1569 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1570 bool trySkipToken(const AsmToken::TokenKind Kind); 1571 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1572 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1573 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1574 1575 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1576 AsmToken::TokenKind getTokenKind() const; 1577 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1578 bool parseExpr(OperandVector &Operands); 1579 StringRef getTokenStr() const; 1580 AsmToken peekToken(); 1581 AsmToken getToken() const; 1582 SMLoc getLoc() const; 1583 void lex(); 1584 1585 public: 1586 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1587 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1588 1589 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1590 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1591 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1592 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1593 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1594 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1595 1596 bool parseSwizzleOperand(int64_t &Op, 1597 const unsigned MinVal, 1598 const unsigned MaxVal, 1599 const StringRef ErrMsg, 1600 SMLoc &Loc); 1601 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1602 const unsigned MinVal, 1603 const unsigned MaxVal, 1604 const StringRef ErrMsg); 1605 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1606 bool parseSwizzleOffset(int64_t &Imm); 1607 bool parseSwizzleMacro(int64_t &Imm); 1608 bool parseSwizzleQuadPerm(int64_t &Imm); 1609 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1610 bool parseSwizzleBroadcast(int64_t &Imm); 1611 bool parseSwizzleSwap(int64_t &Imm); 1612 bool parseSwizzleReverse(int64_t &Imm); 1613 1614 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1615 int64_t parseGPRIdxMacro(); 1616 1617 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); } 1618 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); } 1619 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); } 1620 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1621 1622 AMDGPUOperand::Ptr defaultCPol() const; 1623 AMDGPUOperand::Ptr defaultCPol_GLC1() const; 1624 1625 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1626 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1627 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1628 AMDGPUOperand::Ptr defaultFlatOffset() const; 1629 1630 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1631 1632 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1633 OptionalImmIndexMap &OptionalIdx); 1634 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1635 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1636 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1637 1638 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1639 1640 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1641 bool IsAtomic = false); 1642 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1643 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1644 1645 void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands); 1646 1647 bool parseDimId(unsigned &Encoding); 1648 OperandMatchResultTy parseDim(OperandVector &Operands); 1649 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1650 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1651 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1652 int64_t parseDPPCtrlSel(StringRef Ctrl); 1653 int64_t parseDPPCtrlPerm(); 1654 AMDGPUOperand::Ptr defaultRowMask() const; 1655 AMDGPUOperand::Ptr defaultBankMask() const; 1656 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1657 AMDGPUOperand::Ptr defaultFI() const; 1658 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1659 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1660 1661 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1662 AMDGPUOperand::ImmTy Type); 1663 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1664 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1665 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1666 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1667 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1668 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1669 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1670 uint64_t BasicInstType, 1671 bool SkipDstVcc = false, 1672 bool SkipSrcVcc = false); 1673 1674 AMDGPUOperand::Ptr defaultBLGP() const; 1675 AMDGPUOperand::Ptr defaultCBSZ() const; 1676 AMDGPUOperand::Ptr defaultABID() const; 1677 1678 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1679 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1680 }; 1681 1682 struct OptionalOperand { 1683 const char *Name; 1684 AMDGPUOperand::ImmTy Type; 1685 bool IsBit; 1686 bool (*ConvertResult)(int64_t&); 1687 }; 1688 1689 } // end anonymous namespace 1690 1691 // May be called with integer type with equivalent bitwidth. 1692 static const fltSemantics *getFltSemantics(unsigned Size) { 1693 switch (Size) { 1694 case 4: 1695 return &APFloat::IEEEsingle(); 1696 case 8: 1697 return &APFloat::IEEEdouble(); 1698 case 2: 1699 return &APFloat::IEEEhalf(); 1700 default: 1701 llvm_unreachable("unsupported fp type"); 1702 } 1703 } 1704 1705 static const fltSemantics *getFltSemantics(MVT VT) { 1706 return getFltSemantics(VT.getSizeInBits() / 8); 1707 } 1708 1709 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1710 switch (OperandType) { 1711 case AMDGPU::OPERAND_REG_IMM_INT32: 1712 case AMDGPU::OPERAND_REG_IMM_FP32: 1713 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1714 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1715 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1716 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1717 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1718 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1719 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1720 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1721 return &APFloat::IEEEsingle(); 1722 case AMDGPU::OPERAND_REG_IMM_INT64: 1723 case AMDGPU::OPERAND_REG_IMM_FP64: 1724 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1725 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1727 return &APFloat::IEEEdouble(); 1728 case AMDGPU::OPERAND_REG_IMM_INT16: 1729 case AMDGPU::OPERAND_REG_IMM_FP16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1731 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1733 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1735 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1737 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1738 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1739 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1740 return &APFloat::IEEEhalf(); 1741 default: 1742 llvm_unreachable("unsupported fp type"); 1743 } 1744 } 1745 1746 //===----------------------------------------------------------------------===// 1747 // Operand 1748 //===----------------------------------------------------------------------===// 1749 1750 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1751 bool Lost; 1752 1753 // Convert literal to single precision 1754 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1755 APFloat::rmNearestTiesToEven, 1756 &Lost); 1757 // We allow precision lost but not overflow or underflow 1758 if (Status != APFloat::opOK && 1759 Lost && 1760 ((Status & APFloat::opOverflow) != 0 || 1761 (Status & APFloat::opUnderflow) != 0)) { 1762 return false; 1763 } 1764 1765 return true; 1766 } 1767 1768 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1769 return isUIntN(Size, Val) || isIntN(Size, Val); 1770 } 1771 1772 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1773 if (VT.getScalarType() == MVT::i16) { 1774 // FP immediate values are broken. 1775 return isInlinableIntLiteral(Val); 1776 } 1777 1778 // f16/v2f16 operands work correctly for all values. 1779 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1780 } 1781 1782 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1783 1784 // This is a hack to enable named inline values like 1785 // shared_base with both 32-bit and 64-bit operands. 1786 // Note that these values are defined as 1787 // 32-bit operands only. 1788 if (isInlineValue()) { 1789 return true; 1790 } 1791 1792 if (!isImmTy(ImmTyNone)) { 1793 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1794 return false; 1795 } 1796 // TODO: We should avoid using host float here. It would be better to 1797 // check the float bit values which is what a few other places do. 1798 // We've had bot failures before due to weird NaN support on mips hosts. 1799 1800 APInt Literal(64, Imm.Val); 1801 1802 if (Imm.IsFPImm) { // We got fp literal token 1803 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1804 return AMDGPU::isInlinableLiteral64(Imm.Val, 1805 AsmParser->hasInv2PiInlineImm()); 1806 } 1807 1808 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1809 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1810 return false; 1811 1812 if (type.getScalarSizeInBits() == 16) { 1813 return isInlineableLiteralOp16( 1814 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1815 type, AsmParser->hasInv2PiInlineImm()); 1816 } 1817 1818 // Check if single precision literal is inlinable 1819 return AMDGPU::isInlinableLiteral32( 1820 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1821 AsmParser->hasInv2PiInlineImm()); 1822 } 1823 1824 // We got int literal token. 1825 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1826 return AMDGPU::isInlinableLiteral64(Imm.Val, 1827 AsmParser->hasInv2PiInlineImm()); 1828 } 1829 1830 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1831 return false; 1832 } 1833 1834 if (type.getScalarSizeInBits() == 16) { 1835 return isInlineableLiteralOp16( 1836 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1837 type, AsmParser->hasInv2PiInlineImm()); 1838 } 1839 1840 return AMDGPU::isInlinableLiteral32( 1841 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1842 AsmParser->hasInv2PiInlineImm()); 1843 } 1844 1845 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1846 // Check that this immediate can be added as literal 1847 if (!isImmTy(ImmTyNone)) { 1848 return false; 1849 } 1850 1851 if (!Imm.IsFPImm) { 1852 // We got int literal token. 1853 1854 if (type == MVT::f64 && hasFPModifiers()) { 1855 // Cannot apply fp modifiers to int literals preserving the same semantics 1856 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1857 // disable these cases. 1858 return false; 1859 } 1860 1861 unsigned Size = type.getSizeInBits(); 1862 if (Size == 64) 1863 Size = 32; 1864 1865 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1866 // types. 1867 return isSafeTruncation(Imm.Val, Size); 1868 } 1869 1870 // We got fp literal token 1871 if (type == MVT::f64) { // Expected 64-bit fp operand 1872 // We would set low 64-bits of literal to zeroes but we accept this literals 1873 return true; 1874 } 1875 1876 if (type == MVT::i64) { // Expected 64-bit int operand 1877 // We don't allow fp literals in 64-bit integer instructions. It is 1878 // unclear how we should encode them. 1879 return false; 1880 } 1881 1882 // We allow fp literals with f16x2 operands assuming that the specified 1883 // literal goes into the lower half and the upper half is zero. We also 1884 // require that the literal may be losslesly converted to f16. 1885 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1886 (type == MVT::v2i16)? MVT::i16 : 1887 (type == MVT::v2f32)? MVT::f32 : type; 1888 1889 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1890 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1891 } 1892 1893 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1894 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1895 } 1896 1897 bool AMDGPUOperand::isVRegWithInputMods() const { 1898 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1899 // GFX90A allows DPP on 64-bit operands. 1900 (isRegClass(AMDGPU::VReg_64RegClassID) && 1901 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1902 } 1903 1904 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1905 if (AsmParser->isVI()) 1906 return isVReg32(); 1907 else if (AsmParser->isGFX9Plus()) 1908 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1909 else 1910 return false; 1911 } 1912 1913 bool AMDGPUOperand::isSDWAFP16Operand() const { 1914 return isSDWAOperand(MVT::f16); 1915 } 1916 1917 bool AMDGPUOperand::isSDWAFP32Operand() const { 1918 return isSDWAOperand(MVT::f32); 1919 } 1920 1921 bool AMDGPUOperand::isSDWAInt16Operand() const { 1922 return isSDWAOperand(MVT::i16); 1923 } 1924 1925 bool AMDGPUOperand::isSDWAInt32Operand() const { 1926 return isSDWAOperand(MVT::i32); 1927 } 1928 1929 bool AMDGPUOperand::isBoolReg() const { 1930 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1931 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1932 } 1933 1934 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1935 { 1936 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1937 assert(Size == 2 || Size == 4 || Size == 8); 1938 1939 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1940 1941 if (Imm.Mods.Abs) { 1942 Val &= ~FpSignMask; 1943 } 1944 if (Imm.Mods.Neg) { 1945 Val ^= FpSignMask; 1946 } 1947 1948 return Val; 1949 } 1950 1951 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1952 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1953 Inst.getNumOperands())) { 1954 addLiteralImmOperand(Inst, Imm.Val, 1955 ApplyModifiers & 1956 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1957 } else { 1958 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1959 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1960 setImmKindNone(); 1961 } 1962 } 1963 1964 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1965 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1966 auto OpNum = Inst.getNumOperands(); 1967 // Check that this operand accepts literals 1968 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1969 1970 if (ApplyModifiers) { 1971 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1972 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1973 Val = applyInputFPModifiers(Val, Size); 1974 } 1975 1976 APInt Literal(64, Val); 1977 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1978 1979 if (Imm.IsFPImm) { // We got fp literal token 1980 switch (OpTy) { 1981 case AMDGPU::OPERAND_REG_IMM_INT64: 1982 case AMDGPU::OPERAND_REG_IMM_FP64: 1983 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1984 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1985 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1986 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1987 AsmParser->hasInv2PiInlineImm())) { 1988 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1989 setImmKindConst(); 1990 return; 1991 } 1992 1993 // Non-inlineable 1994 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1995 // For fp operands we check if low 32 bits are zeros 1996 if (Literal.getLoBits(32) != 0) { 1997 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1998 "Can't encode literal as exact 64-bit floating-point operand. " 1999 "Low 32-bits will be set to zero"); 2000 } 2001 2002 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2003 setImmKindLiteral(); 2004 return; 2005 } 2006 2007 // We don't allow fp literals in 64-bit integer instructions. It is 2008 // unclear how we should encode them. This case should be checked earlier 2009 // in predicate methods (isLiteralImm()) 2010 llvm_unreachable("fp literal in 64-bit integer instruction."); 2011 2012 case AMDGPU::OPERAND_REG_IMM_INT32: 2013 case AMDGPU::OPERAND_REG_IMM_FP32: 2014 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2015 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2016 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2017 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2018 case AMDGPU::OPERAND_REG_IMM_INT16: 2019 case AMDGPU::OPERAND_REG_IMM_FP16: 2020 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2021 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2022 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2023 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2024 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2025 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2026 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2027 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2028 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2029 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2030 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2031 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2032 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2033 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2034 bool lost; 2035 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2036 // Convert literal to single precision 2037 FPLiteral.convert(*getOpFltSemantics(OpTy), 2038 APFloat::rmNearestTiesToEven, &lost); 2039 // We allow precision lost but not overflow or underflow. This should be 2040 // checked earlier in isLiteralImm() 2041 2042 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2043 Inst.addOperand(MCOperand::createImm(ImmVal)); 2044 setImmKindLiteral(); 2045 return; 2046 } 2047 default: 2048 llvm_unreachable("invalid operand size"); 2049 } 2050 2051 return; 2052 } 2053 2054 // We got int literal token. 2055 // Only sign extend inline immediates. 2056 switch (OpTy) { 2057 case AMDGPU::OPERAND_REG_IMM_INT32: 2058 case AMDGPU::OPERAND_REG_IMM_FP32: 2059 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2060 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2061 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2062 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2063 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2064 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2065 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2066 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2067 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2068 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2069 if (isSafeTruncation(Val, 32) && 2070 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2071 AsmParser->hasInv2PiInlineImm())) { 2072 Inst.addOperand(MCOperand::createImm(Val)); 2073 setImmKindConst(); 2074 return; 2075 } 2076 2077 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2078 setImmKindLiteral(); 2079 return; 2080 2081 case AMDGPU::OPERAND_REG_IMM_INT64: 2082 case AMDGPU::OPERAND_REG_IMM_FP64: 2083 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2084 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2085 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2086 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2087 Inst.addOperand(MCOperand::createImm(Val)); 2088 setImmKindConst(); 2089 return; 2090 } 2091 2092 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2093 setImmKindLiteral(); 2094 return; 2095 2096 case AMDGPU::OPERAND_REG_IMM_INT16: 2097 case AMDGPU::OPERAND_REG_IMM_FP16: 2098 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2099 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2100 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2101 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2102 if (isSafeTruncation(Val, 16) && 2103 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2104 AsmParser->hasInv2PiInlineImm())) { 2105 Inst.addOperand(MCOperand::createImm(Val)); 2106 setImmKindConst(); 2107 return; 2108 } 2109 2110 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2111 setImmKindLiteral(); 2112 return; 2113 2114 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2115 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2116 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2117 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2118 assert(isSafeTruncation(Val, 16)); 2119 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2120 AsmParser->hasInv2PiInlineImm())); 2121 2122 Inst.addOperand(MCOperand::createImm(Val)); 2123 return; 2124 } 2125 default: 2126 llvm_unreachable("invalid operand size"); 2127 } 2128 } 2129 2130 template <unsigned Bitwidth> 2131 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2132 APInt Literal(64, Imm.Val); 2133 setImmKindNone(); 2134 2135 if (!Imm.IsFPImm) { 2136 // We got int literal token. 2137 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2138 return; 2139 } 2140 2141 bool Lost; 2142 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2143 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2144 APFloat::rmNearestTiesToEven, &Lost); 2145 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2146 } 2147 2148 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2149 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2150 } 2151 2152 static bool isInlineValue(unsigned Reg) { 2153 switch (Reg) { 2154 case AMDGPU::SRC_SHARED_BASE: 2155 case AMDGPU::SRC_SHARED_LIMIT: 2156 case AMDGPU::SRC_PRIVATE_BASE: 2157 case AMDGPU::SRC_PRIVATE_LIMIT: 2158 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2159 return true; 2160 case AMDGPU::SRC_VCCZ: 2161 case AMDGPU::SRC_EXECZ: 2162 case AMDGPU::SRC_SCC: 2163 return true; 2164 case AMDGPU::SGPR_NULL: 2165 return true; 2166 default: 2167 return false; 2168 } 2169 } 2170 2171 bool AMDGPUOperand::isInlineValue() const { 2172 return isRegKind() && ::isInlineValue(getReg()); 2173 } 2174 2175 //===----------------------------------------------------------------------===// 2176 // AsmParser 2177 //===----------------------------------------------------------------------===// 2178 2179 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2180 if (Is == IS_VGPR) { 2181 switch (RegWidth) { 2182 default: return -1; 2183 case 1: return AMDGPU::VGPR_32RegClassID; 2184 case 2: return AMDGPU::VReg_64RegClassID; 2185 case 3: return AMDGPU::VReg_96RegClassID; 2186 case 4: return AMDGPU::VReg_128RegClassID; 2187 case 5: return AMDGPU::VReg_160RegClassID; 2188 case 6: return AMDGPU::VReg_192RegClassID; 2189 case 8: return AMDGPU::VReg_256RegClassID; 2190 case 16: return AMDGPU::VReg_512RegClassID; 2191 case 32: return AMDGPU::VReg_1024RegClassID; 2192 } 2193 } else if (Is == IS_TTMP) { 2194 switch (RegWidth) { 2195 default: return -1; 2196 case 1: return AMDGPU::TTMP_32RegClassID; 2197 case 2: return AMDGPU::TTMP_64RegClassID; 2198 case 4: return AMDGPU::TTMP_128RegClassID; 2199 case 8: return AMDGPU::TTMP_256RegClassID; 2200 case 16: return AMDGPU::TTMP_512RegClassID; 2201 } 2202 } else if (Is == IS_SGPR) { 2203 switch (RegWidth) { 2204 default: return -1; 2205 case 1: return AMDGPU::SGPR_32RegClassID; 2206 case 2: return AMDGPU::SGPR_64RegClassID; 2207 case 3: return AMDGPU::SGPR_96RegClassID; 2208 case 4: return AMDGPU::SGPR_128RegClassID; 2209 case 5: return AMDGPU::SGPR_160RegClassID; 2210 case 6: return AMDGPU::SGPR_192RegClassID; 2211 case 8: return AMDGPU::SGPR_256RegClassID; 2212 case 16: return AMDGPU::SGPR_512RegClassID; 2213 } 2214 } else if (Is == IS_AGPR) { 2215 switch (RegWidth) { 2216 default: return -1; 2217 case 1: return AMDGPU::AGPR_32RegClassID; 2218 case 2: return AMDGPU::AReg_64RegClassID; 2219 case 3: return AMDGPU::AReg_96RegClassID; 2220 case 4: return AMDGPU::AReg_128RegClassID; 2221 case 5: return AMDGPU::AReg_160RegClassID; 2222 case 6: return AMDGPU::AReg_192RegClassID; 2223 case 8: return AMDGPU::AReg_256RegClassID; 2224 case 16: return AMDGPU::AReg_512RegClassID; 2225 case 32: return AMDGPU::AReg_1024RegClassID; 2226 } 2227 } 2228 return -1; 2229 } 2230 2231 static unsigned getSpecialRegForName(StringRef RegName) { 2232 return StringSwitch<unsigned>(RegName) 2233 .Case("exec", AMDGPU::EXEC) 2234 .Case("vcc", AMDGPU::VCC) 2235 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2236 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2237 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2238 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2239 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2240 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2241 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2242 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2243 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2244 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2245 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2246 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2247 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2248 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2249 .Case("m0", AMDGPU::M0) 2250 .Case("vccz", AMDGPU::SRC_VCCZ) 2251 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2252 .Case("execz", AMDGPU::SRC_EXECZ) 2253 .Case("src_execz", AMDGPU::SRC_EXECZ) 2254 .Case("scc", AMDGPU::SRC_SCC) 2255 .Case("src_scc", AMDGPU::SRC_SCC) 2256 .Case("tba", AMDGPU::TBA) 2257 .Case("tma", AMDGPU::TMA) 2258 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2259 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2260 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2261 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2262 .Case("vcc_lo", AMDGPU::VCC_LO) 2263 .Case("vcc_hi", AMDGPU::VCC_HI) 2264 .Case("exec_lo", AMDGPU::EXEC_LO) 2265 .Case("exec_hi", AMDGPU::EXEC_HI) 2266 .Case("tma_lo", AMDGPU::TMA_LO) 2267 .Case("tma_hi", AMDGPU::TMA_HI) 2268 .Case("tba_lo", AMDGPU::TBA_LO) 2269 .Case("tba_hi", AMDGPU::TBA_HI) 2270 .Case("pc", AMDGPU::PC_REG) 2271 .Case("null", AMDGPU::SGPR_NULL) 2272 .Default(AMDGPU::NoRegister); 2273 } 2274 2275 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2276 SMLoc &EndLoc, bool RestoreOnFailure) { 2277 auto R = parseRegister(); 2278 if (!R) return true; 2279 assert(R->isReg()); 2280 RegNo = R->getReg(); 2281 StartLoc = R->getStartLoc(); 2282 EndLoc = R->getEndLoc(); 2283 return false; 2284 } 2285 2286 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2287 SMLoc &EndLoc) { 2288 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2289 } 2290 2291 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2292 SMLoc &StartLoc, 2293 SMLoc &EndLoc) { 2294 bool Result = 2295 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2296 bool PendingErrors = getParser().hasPendingError(); 2297 getParser().clearPendingErrors(); 2298 if (PendingErrors) 2299 return MatchOperand_ParseFail; 2300 if (Result) 2301 return MatchOperand_NoMatch; 2302 return MatchOperand_Success; 2303 } 2304 2305 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2306 RegisterKind RegKind, unsigned Reg1, 2307 SMLoc Loc) { 2308 switch (RegKind) { 2309 case IS_SPECIAL: 2310 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2311 Reg = AMDGPU::EXEC; 2312 RegWidth = 2; 2313 return true; 2314 } 2315 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2316 Reg = AMDGPU::FLAT_SCR; 2317 RegWidth = 2; 2318 return true; 2319 } 2320 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2321 Reg = AMDGPU::XNACK_MASK; 2322 RegWidth = 2; 2323 return true; 2324 } 2325 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2326 Reg = AMDGPU::VCC; 2327 RegWidth = 2; 2328 return true; 2329 } 2330 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2331 Reg = AMDGPU::TBA; 2332 RegWidth = 2; 2333 return true; 2334 } 2335 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2336 Reg = AMDGPU::TMA; 2337 RegWidth = 2; 2338 return true; 2339 } 2340 Error(Loc, "register does not fit in the list"); 2341 return false; 2342 case IS_VGPR: 2343 case IS_SGPR: 2344 case IS_AGPR: 2345 case IS_TTMP: 2346 if (Reg1 != Reg + RegWidth) { 2347 Error(Loc, "registers in a list must have consecutive indices"); 2348 return false; 2349 } 2350 RegWidth++; 2351 return true; 2352 default: 2353 llvm_unreachable("unexpected register kind"); 2354 } 2355 } 2356 2357 struct RegInfo { 2358 StringLiteral Name; 2359 RegisterKind Kind; 2360 }; 2361 2362 static constexpr RegInfo RegularRegisters[] = { 2363 {{"v"}, IS_VGPR}, 2364 {{"s"}, IS_SGPR}, 2365 {{"ttmp"}, IS_TTMP}, 2366 {{"acc"}, IS_AGPR}, 2367 {{"a"}, IS_AGPR}, 2368 }; 2369 2370 static bool isRegularReg(RegisterKind Kind) { 2371 return Kind == IS_VGPR || 2372 Kind == IS_SGPR || 2373 Kind == IS_TTMP || 2374 Kind == IS_AGPR; 2375 } 2376 2377 static const RegInfo* getRegularRegInfo(StringRef Str) { 2378 for (const RegInfo &Reg : RegularRegisters) 2379 if (Str.startswith(Reg.Name)) 2380 return &Reg; 2381 return nullptr; 2382 } 2383 2384 static bool getRegNum(StringRef Str, unsigned& Num) { 2385 return !Str.getAsInteger(10, Num); 2386 } 2387 2388 bool 2389 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2390 const AsmToken &NextToken) const { 2391 2392 // A list of consecutive registers: [s0,s1,s2,s3] 2393 if (Token.is(AsmToken::LBrac)) 2394 return true; 2395 2396 if (!Token.is(AsmToken::Identifier)) 2397 return false; 2398 2399 // A single register like s0 or a range of registers like s[0:1] 2400 2401 StringRef Str = Token.getString(); 2402 const RegInfo *Reg = getRegularRegInfo(Str); 2403 if (Reg) { 2404 StringRef RegName = Reg->Name; 2405 StringRef RegSuffix = Str.substr(RegName.size()); 2406 if (!RegSuffix.empty()) { 2407 unsigned Num; 2408 // A single register with an index: rXX 2409 if (getRegNum(RegSuffix, Num)) 2410 return true; 2411 } else { 2412 // A range of registers: r[XX:YY]. 2413 if (NextToken.is(AsmToken::LBrac)) 2414 return true; 2415 } 2416 } 2417 2418 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2419 } 2420 2421 bool 2422 AMDGPUAsmParser::isRegister() 2423 { 2424 return isRegister(getToken(), peekToken()); 2425 } 2426 2427 unsigned 2428 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2429 unsigned RegNum, 2430 unsigned RegWidth, 2431 SMLoc Loc) { 2432 2433 assert(isRegularReg(RegKind)); 2434 2435 unsigned AlignSize = 1; 2436 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2437 // SGPR and TTMP registers must be aligned. 2438 // Max required alignment is 4 dwords. 2439 AlignSize = std::min(RegWidth, 4u); 2440 } 2441 2442 if (RegNum % AlignSize != 0) { 2443 Error(Loc, "invalid register alignment"); 2444 return AMDGPU::NoRegister; 2445 } 2446 2447 unsigned RegIdx = RegNum / AlignSize; 2448 int RCID = getRegClass(RegKind, RegWidth); 2449 if (RCID == -1) { 2450 Error(Loc, "invalid or unsupported register size"); 2451 return AMDGPU::NoRegister; 2452 } 2453 2454 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2455 const MCRegisterClass RC = TRI->getRegClass(RCID); 2456 if (RegIdx >= RC.getNumRegs()) { 2457 Error(Loc, "register index is out of range"); 2458 return AMDGPU::NoRegister; 2459 } 2460 2461 return RC.getRegister(RegIdx); 2462 } 2463 2464 bool 2465 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2466 int64_t RegLo, RegHi; 2467 if (!skipToken(AsmToken::LBrac, "missing register index")) 2468 return false; 2469 2470 SMLoc FirstIdxLoc = getLoc(); 2471 SMLoc SecondIdxLoc; 2472 2473 if (!parseExpr(RegLo)) 2474 return false; 2475 2476 if (trySkipToken(AsmToken::Colon)) { 2477 SecondIdxLoc = getLoc(); 2478 if (!parseExpr(RegHi)) 2479 return false; 2480 } else { 2481 RegHi = RegLo; 2482 } 2483 2484 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2485 return false; 2486 2487 if (!isUInt<32>(RegLo)) { 2488 Error(FirstIdxLoc, "invalid register index"); 2489 return false; 2490 } 2491 2492 if (!isUInt<32>(RegHi)) { 2493 Error(SecondIdxLoc, "invalid register index"); 2494 return false; 2495 } 2496 2497 if (RegLo > RegHi) { 2498 Error(FirstIdxLoc, "first register index should not exceed second index"); 2499 return false; 2500 } 2501 2502 Num = static_cast<unsigned>(RegLo); 2503 Width = (RegHi - RegLo) + 1; 2504 return true; 2505 } 2506 2507 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2508 unsigned &RegNum, unsigned &RegWidth, 2509 SmallVectorImpl<AsmToken> &Tokens) { 2510 assert(isToken(AsmToken::Identifier)); 2511 unsigned Reg = getSpecialRegForName(getTokenStr()); 2512 if (Reg) { 2513 RegNum = 0; 2514 RegWidth = 1; 2515 RegKind = IS_SPECIAL; 2516 Tokens.push_back(getToken()); 2517 lex(); // skip register name 2518 } 2519 return Reg; 2520 } 2521 2522 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2523 unsigned &RegNum, unsigned &RegWidth, 2524 SmallVectorImpl<AsmToken> &Tokens) { 2525 assert(isToken(AsmToken::Identifier)); 2526 StringRef RegName = getTokenStr(); 2527 auto Loc = getLoc(); 2528 2529 const RegInfo *RI = getRegularRegInfo(RegName); 2530 if (!RI) { 2531 Error(Loc, "invalid register name"); 2532 return AMDGPU::NoRegister; 2533 } 2534 2535 Tokens.push_back(getToken()); 2536 lex(); // skip register name 2537 2538 RegKind = RI->Kind; 2539 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2540 if (!RegSuffix.empty()) { 2541 // Single 32-bit register: vXX. 2542 if (!getRegNum(RegSuffix, RegNum)) { 2543 Error(Loc, "invalid register index"); 2544 return AMDGPU::NoRegister; 2545 } 2546 RegWidth = 1; 2547 } else { 2548 // Range of registers: v[XX:YY]. ":YY" is optional. 2549 if (!ParseRegRange(RegNum, RegWidth)) 2550 return AMDGPU::NoRegister; 2551 } 2552 2553 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2554 } 2555 2556 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2557 unsigned &RegWidth, 2558 SmallVectorImpl<AsmToken> &Tokens) { 2559 unsigned Reg = AMDGPU::NoRegister; 2560 auto ListLoc = getLoc(); 2561 2562 if (!skipToken(AsmToken::LBrac, 2563 "expected a register or a list of registers")) { 2564 return AMDGPU::NoRegister; 2565 } 2566 2567 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2568 2569 auto Loc = getLoc(); 2570 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2571 return AMDGPU::NoRegister; 2572 if (RegWidth != 1) { 2573 Error(Loc, "expected a single 32-bit register"); 2574 return AMDGPU::NoRegister; 2575 } 2576 2577 for (; trySkipToken(AsmToken::Comma); ) { 2578 RegisterKind NextRegKind; 2579 unsigned NextReg, NextRegNum, NextRegWidth; 2580 Loc = getLoc(); 2581 2582 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2583 NextRegNum, NextRegWidth, 2584 Tokens)) { 2585 return AMDGPU::NoRegister; 2586 } 2587 if (NextRegWidth != 1) { 2588 Error(Loc, "expected a single 32-bit register"); 2589 return AMDGPU::NoRegister; 2590 } 2591 if (NextRegKind != RegKind) { 2592 Error(Loc, "registers in a list must be of the same kind"); 2593 return AMDGPU::NoRegister; 2594 } 2595 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2596 return AMDGPU::NoRegister; 2597 } 2598 2599 if (!skipToken(AsmToken::RBrac, 2600 "expected a comma or a closing square bracket")) { 2601 return AMDGPU::NoRegister; 2602 } 2603 2604 if (isRegularReg(RegKind)) 2605 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2606 2607 return Reg; 2608 } 2609 2610 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2611 unsigned &RegNum, unsigned &RegWidth, 2612 SmallVectorImpl<AsmToken> &Tokens) { 2613 auto Loc = getLoc(); 2614 Reg = AMDGPU::NoRegister; 2615 2616 if (isToken(AsmToken::Identifier)) { 2617 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2618 if (Reg == AMDGPU::NoRegister) 2619 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2620 } else { 2621 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2622 } 2623 2624 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2625 if (Reg == AMDGPU::NoRegister) { 2626 assert(Parser.hasPendingError()); 2627 return false; 2628 } 2629 2630 if (!subtargetHasRegister(*TRI, Reg)) { 2631 if (Reg == AMDGPU::SGPR_NULL) { 2632 Error(Loc, "'null' operand is not supported on this GPU"); 2633 } else { 2634 Error(Loc, "register not available on this GPU"); 2635 } 2636 return false; 2637 } 2638 2639 return true; 2640 } 2641 2642 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2643 unsigned &RegNum, unsigned &RegWidth, 2644 bool RestoreOnFailure /*=false*/) { 2645 Reg = AMDGPU::NoRegister; 2646 2647 SmallVector<AsmToken, 1> Tokens; 2648 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2649 if (RestoreOnFailure) { 2650 while (!Tokens.empty()) { 2651 getLexer().UnLex(Tokens.pop_back_val()); 2652 } 2653 } 2654 return true; 2655 } 2656 return false; 2657 } 2658 2659 Optional<StringRef> 2660 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2661 switch (RegKind) { 2662 case IS_VGPR: 2663 return StringRef(".amdgcn.next_free_vgpr"); 2664 case IS_SGPR: 2665 return StringRef(".amdgcn.next_free_sgpr"); 2666 default: 2667 return None; 2668 } 2669 } 2670 2671 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2672 auto SymbolName = getGprCountSymbolName(RegKind); 2673 assert(SymbolName && "initializing invalid register kind"); 2674 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2675 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2676 } 2677 2678 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2679 unsigned DwordRegIndex, 2680 unsigned RegWidth) { 2681 // Symbols are only defined for GCN targets 2682 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2683 return true; 2684 2685 auto SymbolName = getGprCountSymbolName(RegKind); 2686 if (!SymbolName) 2687 return true; 2688 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2689 2690 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2691 int64_t OldCount; 2692 2693 if (!Sym->isVariable()) 2694 return !Error(getLoc(), 2695 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2696 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2697 return !Error( 2698 getLoc(), 2699 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2700 2701 if (OldCount <= NewMax) 2702 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2703 2704 return true; 2705 } 2706 2707 std::unique_ptr<AMDGPUOperand> 2708 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2709 const auto &Tok = getToken(); 2710 SMLoc StartLoc = Tok.getLoc(); 2711 SMLoc EndLoc = Tok.getEndLoc(); 2712 RegisterKind RegKind; 2713 unsigned Reg, RegNum, RegWidth; 2714 2715 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2716 return nullptr; 2717 } 2718 if (isHsaAbiVersion3(&getSTI())) { 2719 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2720 return nullptr; 2721 } else 2722 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2723 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2724 } 2725 2726 OperandMatchResultTy 2727 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2728 // TODO: add syntactic sugar for 1/(2*PI) 2729 2730 assert(!isRegister()); 2731 assert(!isModifier()); 2732 2733 const auto& Tok = getToken(); 2734 const auto& NextTok = peekToken(); 2735 bool IsReal = Tok.is(AsmToken::Real); 2736 SMLoc S = getLoc(); 2737 bool Negate = false; 2738 2739 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2740 lex(); 2741 IsReal = true; 2742 Negate = true; 2743 } 2744 2745 if (IsReal) { 2746 // Floating-point expressions are not supported. 2747 // Can only allow floating-point literals with an 2748 // optional sign. 2749 2750 StringRef Num = getTokenStr(); 2751 lex(); 2752 2753 APFloat RealVal(APFloat::IEEEdouble()); 2754 auto roundMode = APFloat::rmNearestTiesToEven; 2755 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2756 return MatchOperand_ParseFail; 2757 } 2758 if (Negate) 2759 RealVal.changeSign(); 2760 2761 Operands.push_back( 2762 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2763 AMDGPUOperand::ImmTyNone, true)); 2764 2765 return MatchOperand_Success; 2766 2767 } else { 2768 int64_t IntVal; 2769 const MCExpr *Expr; 2770 SMLoc S = getLoc(); 2771 2772 if (HasSP3AbsModifier) { 2773 // This is a workaround for handling expressions 2774 // as arguments of SP3 'abs' modifier, for example: 2775 // |1.0| 2776 // |-1| 2777 // |1+x| 2778 // This syntax is not compatible with syntax of standard 2779 // MC expressions (due to the trailing '|'). 2780 SMLoc EndLoc; 2781 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2782 return MatchOperand_ParseFail; 2783 } else { 2784 if (Parser.parseExpression(Expr)) 2785 return MatchOperand_ParseFail; 2786 } 2787 2788 if (Expr->evaluateAsAbsolute(IntVal)) { 2789 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2790 } else { 2791 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2792 } 2793 2794 return MatchOperand_Success; 2795 } 2796 2797 return MatchOperand_NoMatch; 2798 } 2799 2800 OperandMatchResultTy 2801 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2802 if (!isRegister()) 2803 return MatchOperand_NoMatch; 2804 2805 if (auto R = parseRegister()) { 2806 assert(R->isReg()); 2807 Operands.push_back(std::move(R)); 2808 return MatchOperand_Success; 2809 } 2810 return MatchOperand_ParseFail; 2811 } 2812 2813 OperandMatchResultTy 2814 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2815 auto res = parseReg(Operands); 2816 if (res != MatchOperand_NoMatch) { 2817 return res; 2818 } else if (isModifier()) { 2819 return MatchOperand_NoMatch; 2820 } else { 2821 return parseImm(Operands, HasSP3AbsMod); 2822 } 2823 } 2824 2825 bool 2826 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2827 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2828 const auto &str = Token.getString(); 2829 return str == "abs" || str == "neg" || str == "sext"; 2830 } 2831 return false; 2832 } 2833 2834 bool 2835 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2836 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2837 } 2838 2839 bool 2840 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2841 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2842 } 2843 2844 bool 2845 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2846 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2847 } 2848 2849 // Check if this is an operand modifier or an opcode modifier 2850 // which may look like an expression but it is not. We should 2851 // avoid parsing these modifiers as expressions. Currently 2852 // recognized sequences are: 2853 // |...| 2854 // abs(...) 2855 // neg(...) 2856 // sext(...) 2857 // -reg 2858 // -|...| 2859 // -abs(...) 2860 // name:... 2861 // Note that simple opcode modifiers like 'gds' may be parsed as 2862 // expressions; this is a special case. See getExpressionAsToken. 2863 // 2864 bool 2865 AMDGPUAsmParser::isModifier() { 2866 2867 AsmToken Tok = getToken(); 2868 AsmToken NextToken[2]; 2869 peekTokens(NextToken); 2870 2871 return isOperandModifier(Tok, NextToken[0]) || 2872 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2873 isOpcodeModifierWithVal(Tok, NextToken[0]); 2874 } 2875 2876 // Check if the current token is an SP3 'neg' modifier. 2877 // Currently this modifier is allowed in the following context: 2878 // 2879 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2880 // 2. Before an 'abs' modifier: -abs(...) 2881 // 3. Before an SP3 'abs' modifier: -|...| 2882 // 2883 // In all other cases "-" is handled as a part 2884 // of an expression that follows the sign. 2885 // 2886 // Note: When "-" is followed by an integer literal, 2887 // this is interpreted as integer negation rather 2888 // than a floating-point NEG modifier applied to N. 2889 // Beside being contr-intuitive, such use of floating-point 2890 // NEG modifier would have resulted in different meaning 2891 // of integer literals used with VOP1/2/C and VOP3, 2892 // for example: 2893 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2894 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2895 // Negative fp literals with preceding "-" are 2896 // handled likewise for unifomtity 2897 // 2898 bool 2899 AMDGPUAsmParser::parseSP3NegModifier() { 2900 2901 AsmToken NextToken[2]; 2902 peekTokens(NextToken); 2903 2904 if (isToken(AsmToken::Minus) && 2905 (isRegister(NextToken[0], NextToken[1]) || 2906 NextToken[0].is(AsmToken::Pipe) || 2907 isId(NextToken[0], "abs"))) { 2908 lex(); 2909 return true; 2910 } 2911 2912 return false; 2913 } 2914 2915 OperandMatchResultTy 2916 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2917 bool AllowImm) { 2918 bool Neg, SP3Neg; 2919 bool Abs, SP3Abs; 2920 SMLoc Loc; 2921 2922 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2923 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2924 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2925 return MatchOperand_ParseFail; 2926 } 2927 2928 SP3Neg = parseSP3NegModifier(); 2929 2930 Loc = getLoc(); 2931 Neg = trySkipId("neg"); 2932 if (Neg && SP3Neg) { 2933 Error(Loc, "expected register or immediate"); 2934 return MatchOperand_ParseFail; 2935 } 2936 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2937 return MatchOperand_ParseFail; 2938 2939 Abs = trySkipId("abs"); 2940 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2941 return MatchOperand_ParseFail; 2942 2943 Loc = getLoc(); 2944 SP3Abs = trySkipToken(AsmToken::Pipe); 2945 if (Abs && SP3Abs) { 2946 Error(Loc, "expected register or immediate"); 2947 return MatchOperand_ParseFail; 2948 } 2949 2950 OperandMatchResultTy Res; 2951 if (AllowImm) { 2952 Res = parseRegOrImm(Operands, SP3Abs); 2953 } else { 2954 Res = parseReg(Operands); 2955 } 2956 if (Res != MatchOperand_Success) { 2957 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2958 } 2959 2960 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2961 return MatchOperand_ParseFail; 2962 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2963 return MatchOperand_ParseFail; 2964 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2965 return MatchOperand_ParseFail; 2966 2967 AMDGPUOperand::Modifiers Mods; 2968 Mods.Abs = Abs || SP3Abs; 2969 Mods.Neg = Neg || SP3Neg; 2970 2971 if (Mods.hasFPModifiers()) { 2972 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2973 if (Op.isExpr()) { 2974 Error(Op.getStartLoc(), "expected an absolute expression"); 2975 return MatchOperand_ParseFail; 2976 } 2977 Op.setModifiers(Mods); 2978 } 2979 return MatchOperand_Success; 2980 } 2981 2982 OperandMatchResultTy 2983 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2984 bool AllowImm) { 2985 bool Sext = trySkipId("sext"); 2986 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2987 return MatchOperand_ParseFail; 2988 2989 OperandMatchResultTy Res; 2990 if (AllowImm) { 2991 Res = parseRegOrImm(Operands); 2992 } else { 2993 Res = parseReg(Operands); 2994 } 2995 if (Res != MatchOperand_Success) { 2996 return Sext? MatchOperand_ParseFail : Res; 2997 } 2998 2999 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3000 return MatchOperand_ParseFail; 3001 3002 AMDGPUOperand::Modifiers Mods; 3003 Mods.Sext = Sext; 3004 3005 if (Mods.hasIntModifiers()) { 3006 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3007 if (Op.isExpr()) { 3008 Error(Op.getStartLoc(), "expected an absolute expression"); 3009 return MatchOperand_ParseFail; 3010 } 3011 Op.setModifiers(Mods); 3012 } 3013 3014 return MatchOperand_Success; 3015 } 3016 3017 OperandMatchResultTy 3018 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3019 return parseRegOrImmWithFPInputMods(Operands, false); 3020 } 3021 3022 OperandMatchResultTy 3023 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3024 return parseRegOrImmWithIntInputMods(Operands, false); 3025 } 3026 3027 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3028 auto Loc = getLoc(); 3029 if (trySkipId("off")) { 3030 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3031 AMDGPUOperand::ImmTyOff, false)); 3032 return MatchOperand_Success; 3033 } 3034 3035 if (!isRegister()) 3036 return MatchOperand_NoMatch; 3037 3038 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3039 if (Reg) { 3040 Operands.push_back(std::move(Reg)); 3041 return MatchOperand_Success; 3042 } 3043 3044 return MatchOperand_ParseFail; 3045 3046 } 3047 3048 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3049 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3050 3051 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3052 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3053 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3054 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3055 return Match_InvalidOperand; 3056 3057 if ((TSFlags & SIInstrFlags::VOP3) && 3058 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3059 getForcedEncodingSize() != 64) 3060 return Match_PreferE32; 3061 3062 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3063 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3064 // v_mac_f32/16 allow only dst_sel == DWORD; 3065 auto OpNum = 3066 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3067 const auto &Op = Inst.getOperand(OpNum); 3068 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3069 return Match_InvalidOperand; 3070 } 3071 } 3072 3073 return Match_Success; 3074 } 3075 3076 static ArrayRef<unsigned> getAllVariants() { 3077 static const unsigned Variants[] = { 3078 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3079 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3080 }; 3081 3082 return makeArrayRef(Variants); 3083 } 3084 3085 // What asm variants we should check 3086 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3087 if (getForcedEncodingSize() == 32) { 3088 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3089 return makeArrayRef(Variants); 3090 } 3091 3092 if (isForcedVOP3()) { 3093 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3094 return makeArrayRef(Variants); 3095 } 3096 3097 if (isForcedSDWA()) { 3098 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3099 AMDGPUAsmVariants::SDWA9}; 3100 return makeArrayRef(Variants); 3101 } 3102 3103 if (isForcedDPP()) { 3104 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3105 return makeArrayRef(Variants); 3106 } 3107 3108 return getAllVariants(); 3109 } 3110 3111 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3112 if (getForcedEncodingSize() == 32) 3113 return "e32"; 3114 3115 if (isForcedVOP3()) 3116 return "e64"; 3117 3118 if (isForcedSDWA()) 3119 return "sdwa"; 3120 3121 if (isForcedDPP()) 3122 return "dpp"; 3123 3124 return ""; 3125 } 3126 3127 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3128 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3129 const unsigned Num = Desc.getNumImplicitUses(); 3130 for (unsigned i = 0; i < Num; ++i) { 3131 unsigned Reg = Desc.ImplicitUses[i]; 3132 switch (Reg) { 3133 case AMDGPU::FLAT_SCR: 3134 case AMDGPU::VCC: 3135 case AMDGPU::VCC_LO: 3136 case AMDGPU::VCC_HI: 3137 case AMDGPU::M0: 3138 return Reg; 3139 default: 3140 break; 3141 } 3142 } 3143 return AMDGPU::NoRegister; 3144 } 3145 3146 // NB: This code is correct only when used to check constant 3147 // bus limitations because GFX7 support no f16 inline constants. 3148 // Note that there are no cases when a GFX7 opcode violates 3149 // constant bus limitations due to the use of an f16 constant. 3150 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3151 unsigned OpIdx) const { 3152 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3153 3154 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3155 return false; 3156 } 3157 3158 const MCOperand &MO = Inst.getOperand(OpIdx); 3159 3160 int64_t Val = MO.getImm(); 3161 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3162 3163 switch (OpSize) { // expected operand size 3164 case 8: 3165 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3166 case 4: 3167 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3168 case 2: { 3169 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3170 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3171 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3172 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3173 return AMDGPU::isInlinableIntLiteral(Val); 3174 3175 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3176 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3177 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3178 return AMDGPU::isInlinableIntLiteralV216(Val); 3179 3180 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3181 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3182 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3183 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3184 3185 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3186 } 3187 default: 3188 llvm_unreachable("invalid operand size"); 3189 } 3190 } 3191 3192 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3193 if (!isGFX10Plus()) 3194 return 1; 3195 3196 switch (Opcode) { 3197 // 64-bit shift instructions can use only one scalar value input 3198 case AMDGPU::V_LSHLREV_B64_e64: 3199 case AMDGPU::V_LSHLREV_B64_gfx10: 3200 case AMDGPU::V_LSHRREV_B64_e64: 3201 case AMDGPU::V_LSHRREV_B64_gfx10: 3202 case AMDGPU::V_ASHRREV_I64_e64: 3203 case AMDGPU::V_ASHRREV_I64_gfx10: 3204 case AMDGPU::V_LSHL_B64_e64: 3205 case AMDGPU::V_LSHR_B64_e64: 3206 case AMDGPU::V_ASHR_I64_e64: 3207 return 1; 3208 default: 3209 return 2; 3210 } 3211 } 3212 3213 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3214 const MCOperand &MO = Inst.getOperand(OpIdx); 3215 if (MO.isImm()) { 3216 return !isInlineConstant(Inst, OpIdx); 3217 } else if (MO.isReg()) { 3218 auto Reg = MO.getReg(); 3219 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3220 auto PReg = mc2PseudoReg(Reg); 3221 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3222 } else { 3223 return true; 3224 } 3225 } 3226 3227 bool 3228 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3229 const OperandVector &Operands) { 3230 const unsigned Opcode = Inst.getOpcode(); 3231 const MCInstrDesc &Desc = MII.get(Opcode); 3232 unsigned LastSGPR = AMDGPU::NoRegister; 3233 unsigned ConstantBusUseCount = 0; 3234 unsigned NumLiterals = 0; 3235 unsigned LiteralSize; 3236 3237 if (Desc.TSFlags & 3238 (SIInstrFlags::VOPC | 3239 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3240 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3241 SIInstrFlags::SDWA)) { 3242 // Check special imm operands (used by madmk, etc) 3243 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3244 ++ConstantBusUseCount; 3245 } 3246 3247 SmallDenseSet<unsigned> SGPRsUsed; 3248 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3249 if (SGPRUsed != AMDGPU::NoRegister) { 3250 SGPRsUsed.insert(SGPRUsed); 3251 ++ConstantBusUseCount; 3252 } 3253 3254 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3255 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3256 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3257 3258 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3259 3260 for (int OpIdx : OpIndices) { 3261 if (OpIdx == -1) break; 3262 3263 const MCOperand &MO = Inst.getOperand(OpIdx); 3264 if (usesConstantBus(Inst, OpIdx)) { 3265 if (MO.isReg()) { 3266 LastSGPR = mc2PseudoReg(MO.getReg()); 3267 // Pairs of registers with a partial intersections like these 3268 // s0, s[0:1] 3269 // flat_scratch_lo, flat_scratch 3270 // flat_scratch_lo, flat_scratch_hi 3271 // are theoretically valid but they are disabled anyway. 3272 // Note that this code mimics SIInstrInfo::verifyInstruction 3273 if (!SGPRsUsed.count(LastSGPR)) { 3274 SGPRsUsed.insert(LastSGPR); 3275 ++ConstantBusUseCount; 3276 } 3277 } else { // Expression or a literal 3278 3279 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3280 continue; // special operand like VINTERP attr_chan 3281 3282 // An instruction may use only one literal. 3283 // This has been validated on the previous step. 3284 // See validateVOP3Literal. 3285 // This literal may be used as more than one operand. 3286 // If all these operands are of the same size, 3287 // this literal counts as one scalar value. 3288 // Otherwise it counts as 2 scalar values. 3289 // See "GFX10 Shader Programming", section 3.6.2.3. 3290 3291 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3292 if (Size < 4) Size = 4; 3293 3294 if (NumLiterals == 0) { 3295 NumLiterals = 1; 3296 LiteralSize = Size; 3297 } else if (LiteralSize != Size) { 3298 NumLiterals = 2; 3299 } 3300 } 3301 } 3302 } 3303 } 3304 ConstantBusUseCount += NumLiterals; 3305 3306 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3307 return true; 3308 3309 SMLoc LitLoc = getLitLoc(Operands); 3310 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3311 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3312 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3313 return false; 3314 } 3315 3316 bool 3317 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3318 const OperandVector &Operands) { 3319 const unsigned Opcode = Inst.getOpcode(); 3320 const MCInstrDesc &Desc = MII.get(Opcode); 3321 3322 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3323 if (DstIdx == -1 || 3324 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3325 return true; 3326 } 3327 3328 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3329 3330 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3331 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3332 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3333 3334 assert(DstIdx != -1); 3335 const MCOperand &Dst = Inst.getOperand(DstIdx); 3336 assert(Dst.isReg()); 3337 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3338 3339 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3340 3341 for (int SrcIdx : SrcIndices) { 3342 if (SrcIdx == -1) break; 3343 const MCOperand &Src = Inst.getOperand(SrcIdx); 3344 if (Src.isReg()) { 3345 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3346 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3347 Error(getRegLoc(SrcReg, Operands), 3348 "destination must be different than all sources"); 3349 return false; 3350 } 3351 } 3352 } 3353 3354 return true; 3355 } 3356 3357 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3358 3359 const unsigned Opc = Inst.getOpcode(); 3360 const MCInstrDesc &Desc = MII.get(Opc); 3361 3362 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3363 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3364 assert(ClampIdx != -1); 3365 return Inst.getOperand(ClampIdx).getImm() == 0; 3366 } 3367 3368 return true; 3369 } 3370 3371 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3372 3373 const unsigned Opc = Inst.getOpcode(); 3374 const MCInstrDesc &Desc = MII.get(Opc); 3375 3376 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3377 return true; 3378 3379 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3380 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3381 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3382 3383 assert(VDataIdx != -1); 3384 3385 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3386 return true; 3387 3388 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3389 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3390 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3391 if (DMask == 0) 3392 DMask = 1; 3393 3394 unsigned DataSize = 3395 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3396 if (hasPackedD16()) { 3397 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3398 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3399 DataSize = (DataSize + 1) / 2; 3400 } 3401 3402 return (VDataSize / 4) == DataSize + TFESize; 3403 } 3404 3405 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3406 const unsigned Opc = Inst.getOpcode(); 3407 const MCInstrDesc &Desc = MII.get(Opc); 3408 3409 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3410 return true; 3411 3412 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3413 3414 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3415 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3416 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3417 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3418 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3419 3420 assert(VAddr0Idx != -1); 3421 assert(SrsrcIdx != -1); 3422 assert(SrsrcIdx > VAddr0Idx); 3423 3424 if (DimIdx == -1) 3425 return true; // intersect_ray 3426 3427 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3428 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3429 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3430 unsigned VAddrSize = 3431 IsNSA ? SrsrcIdx - VAddr0Idx 3432 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3433 3434 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3435 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3436 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3437 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3438 if (!IsNSA) { 3439 if (AddrSize > 8) 3440 AddrSize = 16; 3441 else if (AddrSize > 4) 3442 AddrSize = 8; 3443 } 3444 3445 return VAddrSize == AddrSize; 3446 } 3447 3448 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3449 3450 const unsigned Opc = Inst.getOpcode(); 3451 const MCInstrDesc &Desc = MII.get(Opc); 3452 3453 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3454 return true; 3455 if (!Desc.mayLoad() || !Desc.mayStore()) 3456 return true; // Not atomic 3457 3458 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3459 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3460 3461 // This is an incomplete check because image_atomic_cmpswap 3462 // may only use 0x3 and 0xf while other atomic operations 3463 // may use 0x1 and 0x3. However these limitations are 3464 // verified when we check that dmask matches dst size. 3465 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3466 } 3467 3468 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3469 3470 const unsigned Opc = Inst.getOpcode(); 3471 const MCInstrDesc &Desc = MII.get(Opc); 3472 3473 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3474 return true; 3475 3476 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3477 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3478 3479 // GATHER4 instructions use dmask in a different fashion compared to 3480 // other MIMG instructions. The only useful DMASK values are 3481 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3482 // (red,red,red,red) etc.) The ISA document doesn't mention 3483 // this. 3484 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3485 } 3486 3487 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3488 const unsigned Opc = Inst.getOpcode(); 3489 const MCInstrDesc &Desc = MII.get(Opc); 3490 3491 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3492 return true; 3493 3494 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3495 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3496 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3497 3498 if (!BaseOpcode->MSAA) 3499 return true; 3500 3501 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3502 assert(DimIdx != -1); 3503 3504 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3505 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3506 3507 return DimInfo->MSAA; 3508 } 3509 3510 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3511 { 3512 switch (Opcode) { 3513 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3514 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3515 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3516 return true; 3517 default: 3518 return false; 3519 } 3520 } 3521 3522 // movrels* opcodes should only allow VGPRS as src0. 3523 // This is specified in .td description for vop1/vop3, 3524 // but sdwa is handled differently. See isSDWAOperand. 3525 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3526 const OperandVector &Operands) { 3527 3528 const unsigned Opc = Inst.getOpcode(); 3529 const MCInstrDesc &Desc = MII.get(Opc); 3530 3531 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3532 return true; 3533 3534 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3535 assert(Src0Idx != -1); 3536 3537 SMLoc ErrLoc; 3538 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3539 if (Src0.isReg()) { 3540 auto Reg = mc2PseudoReg(Src0.getReg()); 3541 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3542 if (!isSGPR(Reg, TRI)) 3543 return true; 3544 ErrLoc = getRegLoc(Reg, Operands); 3545 } else { 3546 ErrLoc = getConstLoc(Operands); 3547 } 3548 3549 Error(ErrLoc, "source operand must be a VGPR"); 3550 return false; 3551 } 3552 3553 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3554 const OperandVector &Operands) { 3555 3556 const unsigned Opc = Inst.getOpcode(); 3557 3558 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3559 return true; 3560 3561 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3562 assert(Src0Idx != -1); 3563 3564 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3565 if (!Src0.isReg()) 3566 return true; 3567 3568 auto Reg = mc2PseudoReg(Src0.getReg()); 3569 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3570 if (isSGPR(Reg, TRI)) { 3571 Error(getRegLoc(Reg, Operands), 3572 "source operand must be either a VGPR or an inline constant"); 3573 return false; 3574 } 3575 3576 return true; 3577 } 3578 3579 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3580 switch (Inst.getOpcode()) { 3581 default: 3582 return true; 3583 case V_DIV_SCALE_F32_gfx6_gfx7: 3584 case V_DIV_SCALE_F32_vi: 3585 case V_DIV_SCALE_F32_gfx10: 3586 case V_DIV_SCALE_F64_gfx6_gfx7: 3587 case V_DIV_SCALE_F64_vi: 3588 case V_DIV_SCALE_F64_gfx10: 3589 break; 3590 } 3591 3592 // TODO: Check that src0 = src1 or src2. 3593 3594 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3595 AMDGPU::OpName::src2_modifiers, 3596 AMDGPU::OpName::src2_modifiers}) { 3597 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3598 .getImm() & 3599 SISrcMods::ABS) { 3600 return false; 3601 } 3602 } 3603 3604 return true; 3605 } 3606 3607 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3608 3609 const unsigned Opc = Inst.getOpcode(); 3610 const MCInstrDesc &Desc = MII.get(Opc); 3611 3612 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3613 return true; 3614 3615 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3616 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3617 if (isCI() || isSI()) 3618 return false; 3619 } 3620 3621 return true; 3622 } 3623 3624 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3625 const unsigned Opc = Inst.getOpcode(); 3626 const MCInstrDesc &Desc = MII.get(Opc); 3627 3628 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3629 return true; 3630 3631 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3632 if (DimIdx < 0) 3633 return true; 3634 3635 long Imm = Inst.getOperand(DimIdx).getImm(); 3636 if (Imm < 0 || Imm >= 8) 3637 return false; 3638 3639 return true; 3640 } 3641 3642 static bool IsRevOpcode(const unsigned Opcode) 3643 { 3644 switch (Opcode) { 3645 case AMDGPU::V_SUBREV_F32_e32: 3646 case AMDGPU::V_SUBREV_F32_e64: 3647 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3648 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3649 case AMDGPU::V_SUBREV_F32_e32_vi: 3650 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3651 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3652 case AMDGPU::V_SUBREV_F32_e64_vi: 3653 3654 case AMDGPU::V_SUBREV_CO_U32_e32: 3655 case AMDGPU::V_SUBREV_CO_U32_e64: 3656 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3657 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3658 3659 case AMDGPU::V_SUBBREV_U32_e32: 3660 case AMDGPU::V_SUBBREV_U32_e64: 3661 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3662 case AMDGPU::V_SUBBREV_U32_e32_vi: 3663 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3664 case AMDGPU::V_SUBBREV_U32_e64_vi: 3665 3666 case AMDGPU::V_SUBREV_U32_e32: 3667 case AMDGPU::V_SUBREV_U32_e64: 3668 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3669 case AMDGPU::V_SUBREV_U32_e32_vi: 3670 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3671 case AMDGPU::V_SUBREV_U32_e64_vi: 3672 3673 case AMDGPU::V_SUBREV_F16_e32: 3674 case AMDGPU::V_SUBREV_F16_e64: 3675 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3676 case AMDGPU::V_SUBREV_F16_e32_vi: 3677 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3678 case AMDGPU::V_SUBREV_F16_e64_vi: 3679 3680 case AMDGPU::V_SUBREV_U16_e32: 3681 case AMDGPU::V_SUBREV_U16_e64: 3682 case AMDGPU::V_SUBREV_U16_e32_vi: 3683 case AMDGPU::V_SUBREV_U16_e64_vi: 3684 3685 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3686 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3687 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3688 3689 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3690 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3691 3692 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3693 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3694 3695 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3696 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3697 3698 case AMDGPU::V_LSHRREV_B32_e32: 3699 case AMDGPU::V_LSHRREV_B32_e64: 3700 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3701 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3702 case AMDGPU::V_LSHRREV_B32_e32_vi: 3703 case AMDGPU::V_LSHRREV_B32_e64_vi: 3704 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3705 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3706 3707 case AMDGPU::V_ASHRREV_I32_e32: 3708 case AMDGPU::V_ASHRREV_I32_e64: 3709 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3710 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3711 case AMDGPU::V_ASHRREV_I32_e32_vi: 3712 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3713 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3714 case AMDGPU::V_ASHRREV_I32_e64_vi: 3715 3716 case AMDGPU::V_LSHLREV_B32_e32: 3717 case AMDGPU::V_LSHLREV_B32_e64: 3718 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3719 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3720 case AMDGPU::V_LSHLREV_B32_e32_vi: 3721 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3722 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3723 case AMDGPU::V_LSHLREV_B32_e64_vi: 3724 3725 case AMDGPU::V_LSHLREV_B16_e32: 3726 case AMDGPU::V_LSHLREV_B16_e64: 3727 case AMDGPU::V_LSHLREV_B16_e32_vi: 3728 case AMDGPU::V_LSHLREV_B16_e64_vi: 3729 case AMDGPU::V_LSHLREV_B16_gfx10: 3730 3731 case AMDGPU::V_LSHRREV_B16_e32: 3732 case AMDGPU::V_LSHRREV_B16_e64: 3733 case AMDGPU::V_LSHRREV_B16_e32_vi: 3734 case AMDGPU::V_LSHRREV_B16_e64_vi: 3735 case AMDGPU::V_LSHRREV_B16_gfx10: 3736 3737 case AMDGPU::V_ASHRREV_I16_e32: 3738 case AMDGPU::V_ASHRREV_I16_e64: 3739 case AMDGPU::V_ASHRREV_I16_e32_vi: 3740 case AMDGPU::V_ASHRREV_I16_e64_vi: 3741 case AMDGPU::V_ASHRREV_I16_gfx10: 3742 3743 case AMDGPU::V_LSHLREV_B64_e64: 3744 case AMDGPU::V_LSHLREV_B64_gfx10: 3745 case AMDGPU::V_LSHLREV_B64_vi: 3746 3747 case AMDGPU::V_LSHRREV_B64_e64: 3748 case AMDGPU::V_LSHRREV_B64_gfx10: 3749 case AMDGPU::V_LSHRREV_B64_vi: 3750 3751 case AMDGPU::V_ASHRREV_I64_e64: 3752 case AMDGPU::V_ASHRREV_I64_gfx10: 3753 case AMDGPU::V_ASHRREV_I64_vi: 3754 3755 case AMDGPU::V_PK_LSHLREV_B16: 3756 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3757 case AMDGPU::V_PK_LSHLREV_B16_vi: 3758 3759 case AMDGPU::V_PK_LSHRREV_B16: 3760 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3761 case AMDGPU::V_PK_LSHRREV_B16_vi: 3762 case AMDGPU::V_PK_ASHRREV_I16: 3763 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3764 case AMDGPU::V_PK_ASHRREV_I16_vi: 3765 return true; 3766 default: 3767 return false; 3768 } 3769 } 3770 3771 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3772 3773 using namespace SIInstrFlags; 3774 const unsigned Opcode = Inst.getOpcode(); 3775 const MCInstrDesc &Desc = MII.get(Opcode); 3776 3777 // lds_direct register is defined so that it can be used 3778 // with 9-bit operands only. Ignore encodings which do not accept these. 3779 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3780 return true; 3781 3782 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3783 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3784 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3785 3786 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3787 3788 // lds_direct cannot be specified as either src1 or src2. 3789 for (int SrcIdx : SrcIndices) { 3790 if (SrcIdx == -1) break; 3791 const MCOperand &Src = Inst.getOperand(SrcIdx); 3792 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3793 return false; 3794 } 3795 } 3796 3797 if (Src0Idx == -1) 3798 return true; 3799 3800 const MCOperand &Src = Inst.getOperand(Src0Idx); 3801 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3802 return true; 3803 3804 // lds_direct is specified as src0. Check additional limitations. 3805 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3806 } 3807 3808 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3809 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3810 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3811 if (Op.isFlatOffset()) 3812 return Op.getStartLoc(); 3813 } 3814 return getLoc(); 3815 } 3816 3817 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3818 const OperandVector &Operands) { 3819 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3820 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3821 return true; 3822 3823 auto Opcode = Inst.getOpcode(); 3824 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3825 assert(OpNum != -1); 3826 3827 const auto &Op = Inst.getOperand(OpNum); 3828 if (!hasFlatOffsets() && Op.getImm() != 0) { 3829 Error(getFlatOffsetLoc(Operands), 3830 "flat offset modifier is not supported on this GPU"); 3831 return false; 3832 } 3833 3834 // For FLAT segment the offset must be positive; 3835 // MSB is ignored and forced to zero. 3836 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3837 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3838 if (!isIntN(OffsetSize, Op.getImm())) { 3839 Error(getFlatOffsetLoc(Operands), 3840 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3841 return false; 3842 } 3843 } else { 3844 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3845 if (!isUIntN(OffsetSize, Op.getImm())) { 3846 Error(getFlatOffsetLoc(Operands), 3847 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3848 return false; 3849 } 3850 } 3851 3852 return true; 3853 } 3854 3855 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3856 // Start with second operand because SMEM Offset cannot be dst or src0. 3857 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3858 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3859 if (Op.isSMEMOffset()) 3860 return Op.getStartLoc(); 3861 } 3862 return getLoc(); 3863 } 3864 3865 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3866 const OperandVector &Operands) { 3867 if (isCI() || isSI()) 3868 return true; 3869 3870 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3871 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3872 return true; 3873 3874 auto Opcode = Inst.getOpcode(); 3875 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3876 if (OpNum == -1) 3877 return true; 3878 3879 const auto &Op = Inst.getOperand(OpNum); 3880 if (!Op.isImm()) 3881 return true; 3882 3883 uint64_t Offset = Op.getImm(); 3884 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3885 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3886 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3887 return true; 3888 3889 Error(getSMEMOffsetLoc(Operands), 3890 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3891 "expected a 21-bit signed offset"); 3892 3893 return false; 3894 } 3895 3896 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3897 unsigned Opcode = Inst.getOpcode(); 3898 const MCInstrDesc &Desc = MII.get(Opcode); 3899 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3900 return true; 3901 3902 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3903 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3904 3905 const int OpIndices[] = { Src0Idx, Src1Idx }; 3906 3907 unsigned NumExprs = 0; 3908 unsigned NumLiterals = 0; 3909 uint32_t LiteralValue; 3910 3911 for (int OpIdx : OpIndices) { 3912 if (OpIdx == -1) break; 3913 3914 const MCOperand &MO = Inst.getOperand(OpIdx); 3915 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3916 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3917 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3918 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3919 if (NumLiterals == 0 || LiteralValue != Value) { 3920 LiteralValue = Value; 3921 ++NumLiterals; 3922 } 3923 } else if (MO.isExpr()) { 3924 ++NumExprs; 3925 } 3926 } 3927 } 3928 3929 return NumLiterals + NumExprs <= 1; 3930 } 3931 3932 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3933 const unsigned Opc = Inst.getOpcode(); 3934 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3935 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3936 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3937 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3938 3939 if (OpSel & ~3) 3940 return false; 3941 } 3942 return true; 3943 } 3944 3945 // Check if VCC register matches wavefront size 3946 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3947 auto FB = getFeatureBits(); 3948 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3949 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3950 } 3951 3952 // VOP3 literal is only allowed in GFX10+ and only one can be used 3953 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3954 const OperandVector &Operands) { 3955 unsigned Opcode = Inst.getOpcode(); 3956 const MCInstrDesc &Desc = MII.get(Opcode); 3957 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3958 return true; 3959 3960 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3961 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3962 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3963 3964 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3965 3966 unsigned NumExprs = 0; 3967 unsigned NumLiterals = 0; 3968 uint32_t LiteralValue; 3969 3970 for (int OpIdx : OpIndices) { 3971 if (OpIdx == -1) break; 3972 3973 const MCOperand &MO = Inst.getOperand(OpIdx); 3974 if (!MO.isImm() && !MO.isExpr()) 3975 continue; 3976 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3977 continue; 3978 3979 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3980 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3981 Error(getConstLoc(Operands), 3982 "inline constants are not allowed for this operand"); 3983 return false; 3984 } 3985 3986 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3987 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3988 if (NumLiterals == 0 || LiteralValue != Value) { 3989 LiteralValue = Value; 3990 ++NumLiterals; 3991 } 3992 } else if (MO.isExpr()) { 3993 ++NumExprs; 3994 } 3995 } 3996 NumLiterals += NumExprs; 3997 3998 if (!NumLiterals) 3999 return true; 4000 4001 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4002 Error(getLitLoc(Operands), "literal operands are not supported"); 4003 return false; 4004 } 4005 4006 if (NumLiterals > 1) { 4007 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4008 return false; 4009 } 4010 4011 return true; 4012 } 4013 4014 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4015 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4016 const MCRegisterInfo *MRI) { 4017 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4018 if (OpIdx < 0) 4019 return -1; 4020 4021 const MCOperand &Op = Inst.getOperand(OpIdx); 4022 if (!Op.isReg()) 4023 return -1; 4024 4025 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4026 auto Reg = Sub ? Sub : Op.getReg(); 4027 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4028 return AGRP32.contains(Reg) ? 1 : 0; 4029 } 4030 4031 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4032 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4033 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4034 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4035 SIInstrFlags::DS)) == 0) 4036 return true; 4037 4038 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4039 : AMDGPU::OpName::vdata; 4040 4041 const MCRegisterInfo *MRI = getMRI(); 4042 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4043 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4044 4045 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4046 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4047 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4048 return false; 4049 } 4050 4051 auto FB = getFeatureBits(); 4052 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4053 if (DataAreg < 0 || DstAreg < 0) 4054 return true; 4055 return DstAreg == DataAreg; 4056 } 4057 4058 return DstAreg < 1 && DataAreg < 1; 4059 } 4060 4061 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4062 auto FB = getFeatureBits(); 4063 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4064 return true; 4065 4066 const MCRegisterInfo *MRI = getMRI(); 4067 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4068 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4069 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4070 const MCOperand &Op = Inst.getOperand(I); 4071 if (!Op.isReg()) 4072 continue; 4073 4074 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4075 if (!Sub) 4076 continue; 4077 4078 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4079 return false; 4080 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4081 return false; 4082 } 4083 4084 return true; 4085 } 4086 4087 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4088 const OperandVector &Operands, 4089 const SMLoc &IDLoc) { 4090 int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4091 AMDGPU::OpName::cpol); 4092 if (CPolPos == -1) 4093 return true; 4094 4095 unsigned CPol = Inst.getOperand(CPolPos).getImm(); 4096 4097 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4098 if ((TSFlags & (SIInstrFlags::SMRD)) && 4099 (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) { 4100 Error(IDLoc, "invalid cache policy for SMRD instruction"); 4101 return false; 4102 } 4103 4104 if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet))) 4105 return true; 4106 4107 if (TSFlags & SIInstrFlags::IsAtomicRet) { 4108 if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) { 4109 Error(IDLoc, "instruction must use glc"); 4110 return false; 4111 } 4112 } else { 4113 if (CPol & CPol::GLC) { 4114 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4115 StringRef CStr(S.getPointer()); 4116 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]); 4117 Error(S, "instruction must not use glc"); 4118 return false; 4119 } 4120 } 4121 4122 if (isGFX90A() && (CPol & CPol::SCC) && (TSFlags & SIInstrFlags::FPAtomic)) { 4123 SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands); 4124 StringRef CStr(S.getPointer()); 4125 S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]); 4126 Error(S, "instruction must not use scc"); 4127 return false; 4128 } 4129 4130 return true; 4131 } 4132 4133 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4134 const SMLoc &IDLoc, 4135 const OperandVector &Operands) { 4136 if (!validateLdsDirect(Inst)) { 4137 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 4138 "invalid use of lds_direct"); 4139 return false; 4140 } 4141 if (!validateSOPLiteral(Inst)) { 4142 Error(getLitLoc(Operands), 4143 "only one literal operand is allowed"); 4144 return false; 4145 } 4146 if (!validateVOP3Literal(Inst, Operands)) { 4147 return false; 4148 } 4149 if (!validateConstantBusLimitations(Inst, Operands)) { 4150 return false; 4151 } 4152 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4153 return false; 4154 } 4155 if (!validateIntClampSupported(Inst)) { 4156 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4157 "integer clamping is not supported on this GPU"); 4158 return false; 4159 } 4160 if (!validateOpSel(Inst)) { 4161 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4162 "invalid op_sel operand"); 4163 return false; 4164 } 4165 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4166 if (!validateMIMGD16(Inst)) { 4167 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4168 "d16 modifier is not supported on this GPU"); 4169 return false; 4170 } 4171 if (!validateMIMGDim(Inst)) { 4172 Error(IDLoc, "dim modifier is required on this GPU"); 4173 return false; 4174 } 4175 if (!validateMIMGMSAA(Inst)) { 4176 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4177 "invalid dim; must be MSAA type"); 4178 return false; 4179 } 4180 if (!validateMIMGDataSize(Inst)) { 4181 Error(IDLoc, 4182 "image data size does not match dmask and tfe"); 4183 return false; 4184 } 4185 if (!validateMIMGAddrSize(Inst)) { 4186 Error(IDLoc, 4187 "image address size does not match dim and a16"); 4188 return false; 4189 } 4190 if (!validateMIMGAtomicDMask(Inst)) { 4191 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4192 "invalid atomic image dmask"); 4193 return false; 4194 } 4195 if (!validateMIMGGatherDMask(Inst)) { 4196 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4197 "invalid image_gather dmask: only one bit must be set"); 4198 return false; 4199 } 4200 if (!validateMovrels(Inst, Operands)) { 4201 return false; 4202 } 4203 if (!validateFlatOffset(Inst, Operands)) { 4204 return false; 4205 } 4206 if (!validateSMEMOffset(Inst, Operands)) { 4207 return false; 4208 } 4209 if (!validateMAIAccWrite(Inst, Operands)) { 4210 return false; 4211 } 4212 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4213 return false; 4214 } 4215 4216 if (!validateAGPRLdSt(Inst)) { 4217 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4218 ? "invalid register class: data and dst should be all VGPR or AGPR" 4219 : "invalid register class: agpr loads and stores not supported on this GPU" 4220 ); 4221 return false; 4222 } 4223 if (!validateVGPRAlign(Inst)) { 4224 Error(IDLoc, 4225 "invalid register class: vgpr tuples must be 64 bit aligned"); 4226 return false; 4227 } 4228 4229 if (!validateDivScale(Inst)) { 4230 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4231 return false; 4232 } 4233 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4234 return false; 4235 } 4236 4237 return true; 4238 } 4239 4240 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4241 const FeatureBitset &FBS, 4242 unsigned VariantID = 0); 4243 4244 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4245 const FeatureBitset &AvailableFeatures, 4246 unsigned VariantID); 4247 4248 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4249 const FeatureBitset &FBS) { 4250 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4251 } 4252 4253 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4254 const FeatureBitset &FBS, 4255 ArrayRef<unsigned> Variants) { 4256 for (auto Variant : Variants) { 4257 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4258 return true; 4259 } 4260 4261 return false; 4262 } 4263 4264 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4265 const SMLoc &IDLoc) { 4266 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4267 4268 // Check if requested instruction variant is supported. 4269 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4270 return false; 4271 4272 // This instruction is not supported. 4273 // Clear any other pending errors because they are no longer relevant. 4274 getParser().clearPendingErrors(); 4275 4276 // Requested instruction variant is not supported. 4277 // Check if any other variants are supported. 4278 StringRef VariantName = getMatchedVariantName(); 4279 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4280 return Error(IDLoc, 4281 Twine(VariantName, 4282 " variant of this instruction is not supported")); 4283 } 4284 4285 // Finally check if this instruction is supported on any other GPU. 4286 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4287 return Error(IDLoc, "instruction not supported on this GPU"); 4288 } 4289 4290 // Instruction not supported on any GPU. Probably a typo. 4291 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4292 return Error(IDLoc, "invalid instruction" + Suggestion); 4293 } 4294 4295 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4296 OperandVector &Operands, 4297 MCStreamer &Out, 4298 uint64_t &ErrorInfo, 4299 bool MatchingInlineAsm) { 4300 MCInst Inst; 4301 unsigned Result = Match_Success; 4302 for (auto Variant : getMatchedVariants()) { 4303 uint64_t EI; 4304 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4305 Variant); 4306 // We order match statuses from least to most specific. We use most specific 4307 // status as resulting 4308 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4309 if ((R == Match_Success) || 4310 (R == Match_PreferE32) || 4311 (R == Match_MissingFeature && Result != Match_PreferE32) || 4312 (R == Match_InvalidOperand && Result != Match_MissingFeature 4313 && Result != Match_PreferE32) || 4314 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4315 && Result != Match_MissingFeature 4316 && Result != Match_PreferE32)) { 4317 Result = R; 4318 ErrorInfo = EI; 4319 } 4320 if (R == Match_Success) 4321 break; 4322 } 4323 4324 if (Result == Match_Success) { 4325 if (!validateInstruction(Inst, IDLoc, Operands)) { 4326 return true; 4327 } 4328 Inst.setLoc(IDLoc); 4329 Out.emitInstruction(Inst, getSTI()); 4330 return false; 4331 } 4332 4333 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4334 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4335 return true; 4336 } 4337 4338 switch (Result) { 4339 default: break; 4340 case Match_MissingFeature: 4341 // It has been verified that the specified instruction 4342 // mnemonic is valid. A match was found but it requires 4343 // features which are not supported on this GPU. 4344 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4345 4346 case Match_InvalidOperand: { 4347 SMLoc ErrorLoc = IDLoc; 4348 if (ErrorInfo != ~0ULL) { 4349 if (ErrorInfo >= Operands.size()) { 4350 return Error(IDLoc, "too few operands for instruction"); 4351 } 4352 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4353 if (ErrorLoc == SMLoc()) 4354 ErrorLoc = IDLoc; 4355 } 4356 return Error(ErrorLoc, "invalid operand for instruction"); 4357 } 4358 4359 case Match_PreferE32: 4360 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4361 "should be encoded as e32"); 4362 case Match_MnemonicFail: 4363 llvm_unreachable("Invalid instructions should have been handled already"); 4364 } 4365 llvm_unreachable("Implement any new match types added!"); 4366 } 4367 4368 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4369 int64_t Tmp = -1; 4370 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4371 return true; 4372 } 4373 if (getParser().parseAbsoluteExpression(Tmp)) { 4374 return true; 4375 } 4376 Ret = static_cast<uint32_t>(Tmp); 4377 return false; 4378 } 4379 4380 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4381 uint32_t &Minor) { 4382 if (ParseAsAbsoluteExpression(Major)) 4383 return TokError("invalid major version"); 4384 4385 if (!trySkipToken(AsmToken::Comma)) 4386 return TokError("minor version number required, comma expected"); 4387 4388 if (ParseAsAbsoluteExpression(Minor)) 4389 return TokError("invalid minor version"); 4390 4391 return false; 4392 } 4393 4394 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4395 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4396 return TokError("directive only supported for amdgcn architecture"); 4397 4398 std::string Target; 4399 4400 SMLoc TargetStart = getLoc(); 4401 if (getParser().parseEscapedString(Target)) 4402 return true; 4403 SMRange TargetRange = SMRange(TargetStart, getLoc()); 4404 4405 std::string ExpectedTarget; 4406 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4407 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4408 4409 if (Target != ExpectedTargetOS.str()) 4410 return Error(TargetRange.Start, "target must match options", TargetRange); 4411 4412 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4413 return false; 4414 } 4415 4416 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4417 return Error(Range.Start, "value out of range", Range); 4418 } 4419 4420 bool AMDGPUAsmParser::calculateGPRBlocks( 4421 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4422 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4423 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4424 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4425 // TODO(scott.linder): These calculations are duplicated from 4426 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4427 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4428 4429 unsigned NumVGPRs = NextFreeVGPR; 4430 unsigned NumSGPRs = NextFreeSGPR; 4431 4432 if (Version.Major >= 10) 4433 NumSGPRs = 0; 4434 else { 4435 unsigned MaxAddressableNumSGPRs = 4436 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4437 4438 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4439 NumSGPRs > MaxAddressableNumSGPRs) 4440 return OutOfRangeError(SGPRRange); 4441 4442 NumSGPRs += 4443 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4444 4445 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4446 NumSGPRs > MaxAddressableNumSGPRs) 4447 return OutOfRangeError(SGPRRange); 4448 4449 if (Features.test(FeatureSGPRInitBug)) 4450 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4451 } 4452 4453 VGPRBlocks = 4454 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4455 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4456 4457 return false; 4458 } 4459 4460 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4461 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4462 return TokError("directive only supported for amdgcn architecture"); 4463 4464 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4465 return TokError("directive only supported for amdhsa OS"); 4466 4467 StringRef KernelName; 4468 if (getParser().parseIdentifier(KernelName)) 4469 return true; 4470 4471 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4472 4473 StringSet<> Seen; 4474 4475 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4476 4477 SMRange VGPRRange; 4478 uint64_t NextFreeVGPR = 0; 4479 uint64_t AccumOffset = 0; 4480 SMRange SGPRRange; 4481 uint64_t NextFreeSGPR = 0; 4482 unsigned UserSGPRCount = 0; 4483 bool ReserveVCC = true; 4484 bool ReserveFlatScr = true; 4485 bool ReserveXNACK = hasXNACK(); 4486 Optional<bool> EnableWavefrontSize32; 4487 4488 while (true) { 4489 while (trySkipToken(AsmToken::EndOfStatement)); 4490 4491 StringRef ID; 4492 SMRange IDRange = getTok().getLocRange(); 4493 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4494 return true; 4495 4496 if (ID == ".end_amdhsa_kernel") 4497 break; 4498 4499 if (Seen.find(ID) != Seen.end()) 4500 return TokError(".amdhsa_ directives cannot be repeated"); 4501 Seen.insert(ID); 4502 4503 SMLoc ValStart = getLoc(); 4504 int64_t IVal; 4505 if (getParser().parseAbsoluteExpression(IVal)) 4506 return true; 4507 SMLoc ValEnd = getLoc(); 4508 SMRange ValRange = SMRange(ValStart, ValEnd); 4509 4510 if (IVal < 0) 4511 return OutOfRangeError(ValRange); 4512 4513 uint64_t Val = IVal; 4514 4515 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4516 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4517 return OutOfRangeError(RANGE); \ 4518 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4519 4520 if (ID == ".amdhsa_group_segment_fixed_size") { 4521 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4522 return OutOfRangeError(ValRange); 4523 KD.group_segment_fixed_size = Val; 4524 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4525 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4526 return OutOfRangeError(ValRange); 4527 KD.private_segment_fixed_size = Val; 4528 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4529 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4530 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4531 Val, ValRange); 4532 if (Val) 4533 UserSGPRCount += 4; 4534 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4535 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4536 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4537 ValRange); 4538 if (Val) 4539 UserSGPRCount += 2; 4540 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4541 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4542 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4543 ValRange); 4544 if (Val) 4545 UserSGPRCount += 2; 4546 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4547 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4548 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4549 Val, ValRange); 4550 if (Val) 4551 UserSGPRCount += 2; 4552 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4553 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4554 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4555 ValRange); 4556 if (Val) 4557 UserSGPRCount += 2; 4558 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4559 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4560 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4561 ValRange); 4562 if (Val) 4563 UserSGPRCount += 2; 4564 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4565 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4566 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4567 Val, ValRange); 4568 if (Val) 4569 UserSGPRCount += 1; 4570 } else if (ID == ".amdhsa_wavefront_size32") { 4571 if (IVersion.Major < 10) 4572 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4573 EnableWavefrontSize32 = Val; 4574 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4575 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4576 Val, ValRange); 4577 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4578 PARSE_BITS_ENTRY( 4579 KD.compute_pgm_rsrc2, 4580 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4581 ValRange); 4582 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4583 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4584 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4585 ValRange); 4586 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4587 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4588 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4589 ValRange); 4590 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4591 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4592 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4593 ValRange); 4594 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4595 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4596 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4597 ValRange); 4598 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4599 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4600 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4601 ValRange); 4602 } else if (ID == ".amdhsa_next_free_vgpr") { 4603 VGPRRange = ValRange; 4604 NextFreeVGPR = Val; 4605 } else if (ID == ".amdhsa_next_free_sgpr") { 4606 SGPRRange = ValRange; 4607 NextFreeSGPR = Val; 4608 } else if (ID == ".amdhsa_accum_offset") { 4609 if (!isGFX90A()) 4610 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4611 AccumOffset = Val; 4612 } else if (ID == ".amdhsa_reserve_vcc") { 4613 if (!isUInt<1>(Val)) 4614 return OutOfRangeError(ValRange); 4615 ReserveVCC = Val; 4616 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4617 if (IVersion.Major < 7) 4618 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4619 if (!isUInt<1>(Val)) 4620 return OutOfRangeError(ValRange); 4621 ReserveFlatScr = Val; 4622 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4623 if (IVersion.Major < 8) 4624 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4625 if (!isUInt<1>(Val)) 4626 return OutOfRangeError(ValRange); 4627 ReserveXNACK = Val; 4628 } else if (ID == ".amdhsa_float_round_mode_32") { 4629 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4630 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4631 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4632 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4633 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4634 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4635 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4636 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4637 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4638 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4639 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4640 ValRange); 4641 } else if (ID == ".amdhsa_dx10_clamp") { 4642 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4643 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4644 } else if (ID == ".amdhsa_ieee_mode") { 4645 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4646 Val, ValRange); 4647 } else if (ID == ".amdhsa_fp16_overflow") { 4648 if (IVersion.Major < 9) 4649 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4650 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4651 ValRange); 4652 } else if (ID == ".amdhsa_tg_split") { 4653 if (!isGFX90A()) 4654 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4655 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4656 ValRange); 4657 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4658 if (IVersion.Major < 10) 4659 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4660 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4661 ValRange); 4662 } else if (ID == ".amdhsa_memory_ordered") { 4663 if (IVersion.Major < 10) 4664 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4665 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4666 ValRange); 4667 } else if (ID == ".amdhsa_forward_progress") { 4668 if (IVersion.Major < 10) 4669 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4670 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4671 ValRange); 4672 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4673 PARSE_BITS_ENTRY( 4674 KD.compute_pgm_rsrc2, 4675 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4676 ValRange); 4677 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4678 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4679 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4680 Val, ValRange); 4681 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4682 PARSE_BITS_ENTRY( 4683 KD.compute_pgm_rsrc2, 4684 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4685 ValRange); 4686 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4687 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4688 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4689 Val, ValRange); 4690 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4691 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4692 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4693 Val, ValRange); 4694 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4695 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4696 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4697 Val, ValRange); 4698 } else if (ID == ".amdhsa_exception_int_div_zero") { 4699 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4700 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4701 Val, ValRange); 4702 } else { 4703 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4704 } 4705 4706 #undef PARSE_BITS_ENTRY 4707 } 4708 4709 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4710 return TokError(".amdhsa_next_free_vgpr directive is required"); 4711 4712 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4713 return TokError(".amdhsa_next_free_sgpr directive is required"); 4714 4715 unsigned VGPRBlocks; 4716 unsigned SGPRBlocks; 4717 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4718 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4719 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4720 SGPRBlocks)) 4721 return true; 4722 4723 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4724 VGPRBlocks)) 4725 return OutOfRangeError(VGPRRange); 4726 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4727 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4728 4729 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4730 SGPRBlocks)) 4731 return OutOfRangeError(SGPRRange); 4732 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4733 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4734 SGPRBlocks); 4735 4736 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4737 return TokError("too many user SGPRs enabled"); 4738 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4739 UserSGPRCount); 4740 4741 if (isGFX90A()) { 4742 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4743 return TokError(".amdhsa_accum_offset directive is required"); 4744 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4745 return TokError("accum_offset should be in range [4..256] in " 4746 "increments of 4"); 4747 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4748 return TokError("accum_offset exceeds total VGPR allocation"); 4749 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4750 (AccumOffset / 4 - 1)); 4751 } 4752 4753 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4754 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4755 ReserveFlatScr, ReserveXNACK); 4756 return false; 4757 } 4758 4759 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4760 uint32_t Major; 4761 uint32_t Minor; 4762 4763 if (ParseDirectiveMajorMinor(Major, Minor)) 4764 return true; 4765 4766 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4767 return false; 4768 } 4769 4770 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4771 uint32_t Major; 4772 uint32_t Minor; 4773 uint32_t Stepping; 4774 StringRef VendorName; 4775 StringRef ArchName; 4776 4777 // If this directive has no arguments, then use the ISA version for the 4778 // targeted GPU. 4779 if (isToken(AsmToken::EndOfStatement)) { 4780 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4781 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4782 ISA.Stepping, 4783 "AMD", "AMDGPU"); 4784 return false; 4785 } 4786 4787 if (ParseDirectiveMajorMinor(Major, Minor)) 4788 return true; 4789 4790 if (!trySkipToken(AsmToken::Comma)) 4791 return TokError("stepping version number required, comma expected"); 4792 4793 if (ParseAsAbsoluteExpression(Stepping)) 4794 return TokError("invalid stepping version"); 4795 4796 if (!trySkipToken(AsmToken::Comma)) 4797 return TokError("vendor name required, comma expected"); 4798 4799 if (!parseString(VendorName, "invalid vendor name")) 4800 return true; 4801 4802 if (!trySkipToken(AsmToken::Comma)) 4803 return TokError("arch name required, comma expected"); 4804 4805 if (!parseString(ArchName, "invalid arch name")) 4806 return true; 4807 4808 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4809 VendorName, ArchName); 4810 return false; 4811 } 4812 4813 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4814 amd_kernel_code_t &Header) { 4815 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4816 // assembly for backwards compatibility. 4817 if (ID == "max_scratch_backing_memory_byte_size") { 4818 Parser.eatToEndOfStatement(); 4819 return false; 4820 } 4821 4822 SmallString<40> ErrStr; 4823 raw_svector_ostream Err(ErrStr); 4824 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4825 return TokError(Err.str()); 4826 } 4827 Lex(); 4828 4829 if (ID == "enable_wavefront_size32") { 4830 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4831 if (!isGFX10Plus()) 4832 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4833 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4834 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4835 } else { 4836 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4837 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4838 } 4839 } 4840 4841 if (ID == "wavefront_size") { 4842 if (Header.wavefront_size == 5) { 4843 if (!isGFX10Plus()) 4844 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4845 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4846 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4847 } else if (Header.wavefront_size == 6) { 4848 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4849 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4850 } 4851 } 4852 4853 if (ID == "enable_wgp_mode") { 4854 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4855 !isGFX10Plus()) 4856 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4857 } 4858 4859 if (ID == "enable_mem_ordered") { 4860 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4861 !isGFX10Plus()) 4862 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4863 } 4864 4865 if (ID == "enable_fwd_progress") { 4866 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4867 !isGFX10Plus()) 4868 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4869 } 4870 4871 return false; 4872 } 4873 4874 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4875 amd_kernel_code_t Header; 4876 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4877 4878 while (true) { 4879 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4880 // will set the current token to EndOfStatement. 4881 while(trySkipToken(AsmToken::EndOfStatement)); 4882 4883 StringRef ID; 4884 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4885 return true; 4886 4887 if (ID == ".end_amd_kernel_code_t") 4888 break; 4889 4890 if (ParseAMDKernelCodeTValue(ID, Header)) 4891 return true; 4892 } 4893 4894 getTargetStreamer().EmitAMDKernelCodeT(Header); 4895 4896 return false; 4897 } 4898 4899 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4900 StringRef KernelName; 4901 if (!parseId(KernelName, "expected symbol name")) 4902 return true; 4903 4904 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4905 ELF::STT_AMDGPU_HSA_KERNEL); 4906 4907 KernelScope.initialize(getContext()); 4908 return false; 4909 } 4910 4911 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4912 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4913 return Error(getLoc(), 4914 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4915 "architectures"); 4916 } 4917 4918 auto ISAVersionStringFromASM = getToken().getStringContents(); 4919 4920 std::string ISAVersionStringFromSTI; 4921 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4922 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4923 4924 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4925 return Error(getLoc(), 4926 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4927 "arguments specified through the command line"); 4928 } 4929 4930 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4931 Lex(); 4932 4933 return false; 4934 } 4935 4936 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4937 const char *AssemblerDirectiveBegin; 4938 const char *AssemblerDirectiveEnd; 4939 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4940 isHsaAbiVersion3(&getSTI()) 4941 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4942 HSAMD::V3::AssemblerDirectiveEnd) 4943 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4944 HSAMD::AssemblerDirectiveEnd); 4945 4946 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4947 return Error(getLoc(), 4948 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4949 "not available on non-amdhsa OSes")).str()); 4950 } 4951 4952 std::string HSAMetadataString; 4953 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4954 HSAMetadataString)) 4955 return true; 4956 4957 if (isHsaAbiVersion3(&getSTI())) { 4958 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4959 return Error(getLoc(), "invalid HSA metadata"); 4960 } else { 4961 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4962 return Error(getLoc(), "invalid HSA metadata"); 4963 } 4964 4965 return false; 4966 } 4967 4968 /// Common code to parse out a block of text (typically YAML) between start and 4969 /// end directives. 4970 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4971 const char *AssemblerDirectiveEnd, 4972 std::string &CollectString) { 4973 4974 raw_string_ostream CollectStream(CollectString); 4975 4976 getLexer().setSkipSpace(false); 4977 4978 bool FoundEnd = false; 4979 while (!isToken(AsmToken::Eof)) { 4980 while (isToken(AsmToken::Space)) { 4981 CollectStream << getTokenStr(); 4982 Lex(); 4983 } 4984 4985 if (trySkipId(AssemblerDirectiveEnd)) { 4986 FoundEnd = true; 4987 break; 4988 } 4989 4990 CollectStream << Parser.parseStringToEndOfStatement() 4991 << getContext().getAsmInfo()->getSeparatorString(); 4992 4993 Parser.eatToEndOfStatement(); 4994 } 4995 4996 getLexer().setSkipSpace(true); 4997 4998 if (isToken(AsmToken::Eof) && !FoundEnd) { 4999 return TokError(Twine("expected directive ") + 5000 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 5001 } 5002 5003 CollectStream.flush(); 5004 return false; 5005 } 5006 5007 /// Parse the assembler directive for new MsgPack-format PAL metadata. 5008 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 5009 std::string String; 5010 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 5011 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 5012 return true; 5013 5014 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5015 if (!PALMetadata->setFromString(String)) 5016 return Error(getLoc(), "invalid PAL metadata"); 5017 return false; 5018 } 5019 5020 /// Parse the assembler directive for old linear-format PAL metadata. 5021 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5022 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5023 return Error(getLoc(), 5024 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5025 "not available on non-amdpal OSes")).str()); 5026 } 5027 5028 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5029 PALMetadata->setLegacy(); 5030 for (;;) { 5031 uint32_t Key, Value; 5032 if (ParseAsAbsoluteExpression(Key)) { 5033 return TokError(Twine("invalid value in ") + 5034 Twine(PALMD::AssemblerDirective)); 5035 } 5036 if (!trySkipToken(AsmToken::Comma)) { 5037 return TokError(Twine("expected an even number of values in ") + 5038 Twine(PALMD::AssemblerDirective)); 5039 } 5040 if (ParseAsAbsoluteExpression(Value)) { 5041 return TokError(Twine("invalid value in ") + 5042 Twine(PALMD::AssemblerDirective)); 5043 } 5044 PALMetadata->setRegister(Key, Value); 5045 if (!trySkipToken(AsmToken::Comma)) 5046 break; 5047 } 5048 return false; 5049 } 5050 5051 /// ParseDirectiveAMDGPULDS 5052 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5053 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5054 if (getParser().checkForValidSection()) 5055 return true; 5056 5057 StringRef Name; 5058 SMLoc NameLoc = getLoc(); 5059 if (getParser().parseIdentifier(Name)) 5060 return TokError("expected identifier in directive"); 5061 5062 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5063 if (parseToken(AsmToken::Comma, "expected ','")) 5064 return true; 5065 5066 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5067 5068 int64_t Size; 5069 SMLoc SizeLoc = getLoc(); 5070 if (getParser().parseAbsoluteExpression(Size)) 5071 return true; 5072 if (Size < 0) 5073 return Error(SizeLoc, "size must be non-negative"); 5074 if (Size > LocalMemorySize) 5075 return Error(SizeLoc, "size is too large"); 5076 5077 int64_t Alignment = 4; 5078 if (trySkipToken(AsmToken::Comma)) { 5079 SMLoc AlignLoc = getLoc(); 5080 if (getParser().parseAbsoluteExpression(Alignment)) 5081 return true; 5082 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5083 return Error(AlignLoc, "alignment must be a power of two"); 5084 5085 // Alignment larger than the size of LDS is possible in theory, as long 5086 // as the linker manages to place to symbol at address 0, but we do want 5087 // to make sure the alignment fits nicely into a 32-bit integer. 5088 if (Alignment >= 1u << 31) 5089 return Error(AlignLoc, "alignment is too large"); 5090 } 5091 5092 if (parseToken(AsmToken::EndOfStatement, 5093 "unexpected token in '.amdgpu_lds' directive")) 5094 return true; 5095 5096 Symbol->redefineIfPossible(); 5097 if (!Symbol->isUndefined()) 5098 return Error(NameLoc, "invalid symbol redefinition"); 5099 5100 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5101 return false; 5102 } 5103 5104 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5105 StringRef IDVal = DirectiveID.getString(); 5106 5107 if (isHsaAbiVersion3(&getSTI())) { 5108 if (IDVal == ".amdgcn_target") 5109 return ParseDirectiveAMDGCNTarget(); 5110 5111 if (IDVal == ".amdhsa_kernel") 5112 return ParseDirectiveAMDHSAKernel(); 5113 5114 // TODO: Restructure/combine with PAL metadata directive. 5115 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5116 return ParseDirectiveHSAMetadata(); 5117 } else { 5118 if (IDVal == ".hsa_code_object_version") 5119 return ParseDirectiveHSACodeObjectVersion(); 5120 5121 if (IDVal == ".hsa_code_object_isa") 5122 return ParseDirectiveHSACodeObjectISA(); 5123 5124 if (IDVal == ".amd_kernel_code_t") 5125 return ParseDirectiveAMDKernelCodeT(); 5126 5127 if (IDVal == ".amdgpu_hsa_kernel") 5128 return ParseDirectiveAMDGPUHsaKernel(); 5129 5130 if (IDVal == ".amd_amdgpu_isa") 5131 return ParseDirectiveISAVersion(); 5132 5133 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5134 return ParseDirectiveHSAMetadata(); 5135 } 5136 5137 if (IDVal == ".amdgpu_lds") 5138 return ParseDirectiveAMDGPULDS(); 5139 5140 if (IDVal == PALMD::AssemblerDirectiveBegin) 5141 return ParseDirectivePALMetadataBegin(); 5142 5143 if (IDVal == PALMD::AssemblerDirective) 5144 return ParseDirectivePALMetadata(); 5145 5146 return true; 5147 } 5148 5149 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5150 unsigned RegNo) const { 5151 5152 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5153 R.isValid(); ++R) { 5154 if (*R == RegNo) 5155 return isGFX9Plus(); 5156 } 5157 5158 // GFX10 has 2 more SGPRs 104 and 105. 5159 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5160 R.isValid(); ++R) { 5161 if (*R == RegNo) 5162 return hasSGPR104_SGPR105(); 5163 } 5164 5165 switch (RegNo) { 5166 case AMDGPU::SRC_SHARED_BASE: 5167 case AMDGPU::SRC_SHARED_LIMIT: 5168 case AMDGPU::SRC_PRIVATE_BASE: 5169 case AMDGPU::SRC_PRIVATE_LIMIT: 5170 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5171 return isGFX9Plus(); 5172 case AMDGPU::TBA: 5173 case AMDGPU::TBA_LO: 5174 case AMDGPU::TBA_HI: 5175 case AMDGPU::TMA: 5176 case AMDGPU::TMA_LO: 5177 case AMDGPU::TMA_HI: 5178 return !isGFX9Plus(); 5179 case AMDGPU::XNACK_MASK: 5180 case AMDGPU::XNACK_MASK_LO: 5181 case AMDGPU::XNACK_MASK_HI: 5182 return (isVI() || isGFX9()) && hasXNACK(); 5183 case AMDGPU::SGPR_NULL: 5184 return isGFX10Plus(); 5185 default: 5186 break; 5187 } 5188 5189 if (isCI()) 5190 return true; 5191 5192 if (isSI() || isGFX10Plus()) { 5193 // No flat_scr on SI. 5194 // On GFX10 flat scratch is not a valid register operand and can only be 5195 // accessed with s_setreg/s_getreg. 5196 switch (RegNo) { 5197 case AMDGPU::FLAT_SCR: 5198 case AMDGPU::FLAT_SCR_LO: 5199 case AMDGPU::FLAT_SCR_HI: 5200 return false; 5201 default: 5202 return true; 5203 } 5204 } 5205 5206 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5207 // SI/CI have. 5208 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5209 R.isValid(); ++R) { 5210 if (*R == RegNo) 5211 return hasSGPR102_SGPR103(); 5212 } 5213 5214 return true; 5215 } 5216 5217 OperandMatchResultTy 5218 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5219 OperandMode Mode) { 5220 // Try to parse with a custom parser 5221 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5222 5223 // If we successfully parsed the operand or if there as an error parsing, 5224 // we are done. 5225 // 5226 // If we are parsing after we reach EndOfStatement then this means we 5227 // are appending default values to the Operands list. This is only done 5228 // by custom parser, so we shouldn't continue on to the generic parsing. 5229 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5230 isToken(AsmToken::EndOfStatement)) 5231 return ResTy; 5232 5233 SMLoc RBraceLoc; 5234 SMLoc LBraceLoc = getLoc(); 5235 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5236 unsigned Prefix = Operands.size(); 5237 5238 for (;;) { 5239 auto Loc = getLoc(); 5240 ResTy = parseReg(Operands); 5241 if (ResTy == MatchOperand_NoMatch) 5242 Error(Loc, "expected a register"); 5243 if (ResTy != MatchOperand_Success) 5244 return MatchOperand_ParseFail; 5245 5246 RBraceLoc = getLoc(); 5247 if (trySkipToken(AsmToken::RBrac)) 5248 break; 5249 5250 if (!skipToken(AsmToken::Comma, 5251 "expected a comma or a closing square bracket")) { 5252 return MatchOperand_ParseFail; 5253 } 5254 } 5255 5256 if (Operands.size() - Prefix > 1) { 5257 Operands.insert(Operands.begin() + Prefix, 5258 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5259 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5260 } 5261 5262 return MatchOperand_Success; 5263 } 5264 5265 return parseRegOrImm(Operands); 5266 } 5267 5268 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5269 // Clear any forced encodings from the previous instruction. 5270 setForcedEncodingSize(0); 5271 setForcedDPP(false); 5272 setForcedSDWA(false); 5273 5274 if (Name.endswith("_e64")) { 5275 setForcedEncodingSize(64); 5276 return Name.substr(0, Name.size() - 4); 5277 } else if (Name.endswith("_e32")) { 5278 setForcedEncodingSize(32); 5279 return Name.substr(0, Name.size() - 4); 5280 } else if (Name.endswith("_dpp")) { 5281 setForcedDPP(true); 5282 return Name.substr(0, Name.size() - 4); 5283 } else if (Name.endswith("_sdwa")) { 5284 setForcedSDWA(true); 5285 return Name.substr(0, Name.size() - 5); 5286 } 5287 return Name; 5288 } 5289 5290 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5291 StringRef Name, 5292 SMLoc NameLoc, OperandVector &Operands) { 5293 // Add the instruction mnemonic 5294 Name = parseMnemonicSuffix(Name); 5295 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5296 5297 bool IsMIMG = Name.startswith("image_"); 5298 5299 while (!trySkipToken(AsmToken::EndOfStatement)) { 5300 OperandMode Mode = OperandMode_Default; 5301 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5302 Mode = OperandMode_NSA; 5303 CPolSeen = 0; 5304 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5305 5306 if (Res != MatchOperand_Success) { 5307 checkUnsupportedInstruction(Name, NameLoc); 5308 if (!Parser.hasPendingError()) { 5309 // FIXME: use real operand location rather than the current location. 5310 StringRef Msg = 5311 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5312 "not a valid operand."; 5313 Error(getLoc(), Msg); 5314 } 5315 while (!trySkipToken(AsmToken::EndOfStatement)) { 5316 lex(); 5317 } 5318 return true; 5319 } 5320 5321 // Eat the comma or space if there is one. 5322 trySkipToken(AsmToken::Comma); 5323 } 5324 5325 return false; 5326 } 5327 5328 //===----------------------------------------------------------------------===// 5329 // Utility functions 5330 //===----------------------------------------------------------------------===// 5331 5332 OperandMatchResultTy 5333 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5334 5335 if (!trySkipId(Prefix, AsmToken::Colon)) 5336 return MatchOperand_NoMatch; 5337 5338 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5339 } 5340 5341 OperandMatchResultTy 5342 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5343 AMDGPUOperand::ImmTy ImmTy, 5344 bool (*ConvertResult)(int64_t&)) { 5345 SMLoc S = getLoc(); 5346 int64_t Value = 0; 5347 5348 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5349 if (Res != MatchOperand_Success) 5350 return Res; 5351 5352 if (ConvertResult && !ConvertResult(Value)) { 5353 Error(S, "invalid " + StringRef(Prefix) + " value."); 5354 } 5355 5356 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5357 return MatchOperand_Success; 5358 } 5359 5360 OperandMatchResultTy 5361 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5362 OperandVector &Operands, 5363 AMDGPUOperand::ImmTy ImmTy, 5364 bool (*ConvertResult)(int64_t&)) { 5365 SMLoc S = getLoc(); 5366 if (!trySkipId(Prefix, AsmToken::Colon)) 5367 return MatchOperand_NoMatch; 5368 5369 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5370 return MatchOperand_ParseFail; 5371 5372 unsigned Val = 0; 5373 const unsigned MaxSize = 4; 5374 5375 // FIXME: How to verify the number of elements matches the number of src 5376 // operands? 5377 for (int I = 0; ; ++I) { 5378 int64_t Op; 5379 SMLoc Loc = getLoc(); 5380 if (!parseExpr(Op)) 5381 return MatchOperand_ParseFail; 5382 5383 if (Op != 0 && Op != 1) { 5384 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5385 return MatchOperand_ParseFail; 5386 } 5387 5388 Val |= (Op << I); 5389 5390 if (trySkipToken(AsmToken::RBrac)) 5391 break; 5392 5393 if (I + 1 == MaxSize) { 5394 Error(getLoc(), "expected a closing square bracket"); 5395 return MatchOperand_ParseFail; 5396 } 5397 5398 if (!skipToken(AsmToken::Comma, "expected a comma")) 5399 return MatchOperand_ParseFail; 5400 } 5401 5402 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5403 return MatchOperand_Success; 5404 } 5405 5406 OperandMatchResultTy 5407 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5408 AMDGPUOperand::ImmTy ImmTy) { 5409 int64_t Bit; 5410 SMLoc S = getLoc(); 5411 5412 if (trySkipId(Name)) { 5413 Bit = 1; 5414 } else if (trySkipId("no", Name)) { 5415 Bit = 0; 5416 } else { 5417 return MatchOperand_NoMatch; 5418 } 5419 5420 if (Name == "r128" && !hasMIMG_R128()) { 5421 Error(S, "r128 modifier is not supported on this GPU"); 5422 return MatchOperand_ParseFail; 5423 } 5424 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5425 Error(S, "a16 modifier is not supported on this GPU"); 5426 return MatchOperand_ParseFail; 5427 } 5428 5429 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5430 ImmTy = AMDGPUOperand::ImmTyR128A16; 5431 5432 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5433 return MatchOperand_Success; 5434 } 5435 5436 OperandMatchResultTy 5437 AMDGPUAsmParser::parseCPol(OperandVector &Operands) { 5438 unsigned CPolOn = 0; 5439 unsigned CPolOff = 0; 5440 SMLoc S = getLoc(); 5441 5442 if (trySkipId("glc")) 5443 CPolOn = AMDGPU::CPol::GLC; 5444 else if (trySkipId("noglc")) 5445 CPolOff = AMDGPU::CPol::GLC; 5446 else if (trySkipId("slc")) 5447 CPolOn = AMDGPU::CPol::SLC; 5448 else if (trySkipId("noslc")) 5449 CPolOff = AMDGPU::CPol::SLC; 5450 else if (trySkipId("dlc")) 5451 CPolOn = AMDGPU::CPol::DLC; 5452 else if (trySkipId("nodlc")) 5453 CPolOff = AMDGPU::CPol::DLC; 5454 else if (trySkipId("scc")) 5455 CPolOn = AMDGPU::CPol::SCC; 5456 else if (trySkipId("noscc")) 5457 CPolOff = AMDGPU::CPol::SCC; 5458 else 5459 return MatchOperand_NoMatch; 5460 5461 if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) { 5462 Error(S, "dlc modifier is not supported on this GPU"); 5463 return MatchOperand_ParseFail; 5464 } 5465 5466 if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) { 5467 Error(S, "scc modifier is not supported on this GPU"); 5468 return MatchOperand_ParseFail; 5469 } 5470 5471 if (CPolSeen & (CPolOn | CPolOff)) { 5472 Error(S, "duplicate cache policy modifier"); 5473 return MatchOperand_ParseFail; 5474 } 5475 5476 CPolSeen |= (CPolOn | CPolOff); 5477 5478 for (unsigned I = 1; I != Operands.size(); ++I) { 5479 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5480 if (Op.isCPol()) { 5481 Op.setImm((Op.getImm() | CPolOn) & ~CPolOff); 5482 return MatchOperand_Success; 5483 } 5484 } 5485 5486 Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S, 5487 AMDGPUOperand::ImmTyCPol)); 5488 5489 return MatchOperand_Success; 5490 } 5491 5492 static void addOptionalImmOperand( 5493 MCInst& Inst, const OperandVector& Operands, 5494 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5495 AMDGPUOperand::ImmTy ImmT, 5496 int64_t Default = 0) { 5497 auto i = OptionalIdx.find(ImmT); 5498 if (i != OptionalIdx.end()) { 5499 unsigned Idx = i->second; 5500 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5501 } else { 5502 Inst.addOperand(MCOperand::createImm(Default)); 5503 } 5504 } 5505 5506 OperandMatchResultTy 5507 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5508 StringRef &Value, 5509 SMLoc &StringLoc) { 5510 if (!trySkipId(Prefix, AsmToken::Colon)) 5511 return MatchOperand_NoMatch; 5512 5513 StringLoc = getLoc(); 5514 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5515 : MatchOperand_ParseFail; 5516 } 5517 5518 //===----------------------------------------------------------------------===// 5519 // MTBUF format 5520 //===----------------------------------------------------------------------===// 5521 5522 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5523 int64_t MaxVal, 5524 int64_t &Fmt) { 5525 int64_t Val; 5526 SMLoc Loc = getLoc(); 5527 5528 auto Res = parseIntWithPrefix(Pref, Val); 5529 if (Res == MatchOperand_ParseFail) 5530 return false; 5531 if (Res == MatchOperand_NoMatch) 5532 return true; 5533 5534 if (Val < 0 || Val > MaxVal) { 5535 Error(Loc, Twine("out of range ", StringRef(Pref))); 5536 return false; 5537 } 5538 5539 Fmt = Val; 5540 return true; 5541 } 5542 5543 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5544 // values to live in a joint format operand in the MCInst encoding. 5545 OperandMatchResultTy 5546 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5547 using namespace llvm::AMDGPU::MTBUFFormat; 5548 5549 int64_t Dfmt = DFMT_UNDEF; 5550 int64_t Nfmt = NFMT_UNDEF; 5551 5552 // dfmt and nfmt can appear in either order, and each is optional. 5553 for (int I = 0; I < 2; ++I) { 5554 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5555 return MatchOperand_ParseFail; 5556 5557 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5558 return MatchOperand_ParseFail; 5559 } 5560 // Skip optional comma between dfmt/nfmt 5561 // but guard against 2 commas following each other. 5562 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5563 !peekToken().is(AsmToken::Comma)) { 5564 trySkipToken(AsmToken::Comma); 5565 } 5566 } 5567 5568 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5569 return MatchOperand_NoMatch; 5570 5571 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5572 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5573 5574 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5575 return MatchOperand_Success; 5576 } 5577 5578 OperandMatchResultTy 5579 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5580 using namespace llvm::AMDGPU::MTBUFFormat; 5581 5582 int64_t Fmt = UFMT_UNDEF; 5583 5584 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5585 return MatchOperand_ParseFail; 5586 5587 if (Fmt == UFMT_UNDEF) 5588 return MatchOperand_NoMatch; 5589 5590 Format = Fmt; 5591 return MatchOperand_Success; 5592 } 5593 5594 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5595 int64_t &Nfmt, 5596 StringRef FormatStr, 5597 SMLoc Loc) { 5598 using namespace llvm::AMDGPU::MTBUFFormat; 5599 int64_t Format; 5600 5601 Format = getDfmt(FormatStr); 5602 if (Format != DFMT_UNDEF) { 5603 Dfmt = Format; 5604 return true; 5605 } 5606 5607 Format = getNfmt(FormatStr, getSTI()); 5608 if (Format != NFMT_UNDEF) { 5609 Nfmt = Format; 5610 return true; 5611 } 5612 5613 Error(Loc, "unsupported format"); 5614 return false; 5615 } 5616 5617 OperandMatchResultTy 5618 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5619 SMLoc FormatLoc, 5620 int64_t &Format) { 5621 using namespace llvm::AMDGPU::MTBUFFormat; 5622 5623 int64_t Dfmt = DFMT_UNDEF; 5624 int64_t Nfmt = NFMT_UNDEF; 5625 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5626 return MatchOperand_ParseFail; 5627 5628 if (trySkipToken(AsmToken::Comma)) { 5629 StringRef Str; 5630 SMLoc Loc = getLoc(); 5631 if (!parseId(Str, "expected a format string") || 5632 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5633 return MatchOperand_ParseFail; 5634 } 5635 if (Dfmt == DFMT_UNDEF) { 5636 Error(Loc, "duplicate numeric format"); 5637 return MatchOperand_ParseFail; 5638 } else if (Nfmt == NFMT_UNDEF) { 5639 Error(Loc, "duplicate data format"); 5640 return MatchOperand_ParseFail; 5641 } 5642 } 5643 5644 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5645 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5646 5647 if (isGFX10Plus()) { 5648 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5649 if (Ufmt == UFMT_UNDEF) { 5650 Error(FormatLoc, "unsupported format"); 5651 return MatchOperand_ParseFail; 5652 } 5653 Format = Ufmt; 5654 } else { 5655 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5656 } 5657 5658 return MatchOperand_Success; 5659 } 5660 5661 OperandMatchResultTy 5662 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5663 SMLoc Loc, 5664 int64_t &Format) { 5665 using namespace llvm::AMDGPU::MTBUFFormat; 5666 5667 auto Id = getUnifiedFormat(FormatStr); 5668 if (Id == UFMT_UNDEF) 5669 return MatchOperand_NoMatch; 5670 5671 if (!isGFX10Plus()) { 5672 Error(Loc, "unified format is not supported on this GPU"); 5673 return MatchOperand_ParseFail; 5674 } 5675 5676 Format = Id; 5677 return MatchOperand_Success; 5678 } 5679 5680 OperandMatchResultTy 5681 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5682 using namespace llvm::AMDGPU::MTBUFFormat; 5683 SMLoc Loc = getLoc(); 5684 5685 if (!parseExpr(Format)) 5686 return MatchOperand_ParseFail; 5687 if (!isValidFormatEncoding(Format, getSTI())) { 5688 Error(Loc, "out of range format"); 5689 return MatchOperand_ParseFail; 5690 } 5691 5692 return MatchOperand_Success; 5693 } 5694 5695 OperandMatchResultTy 5696 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5697 using namespace llvm::AMDGPU::MTBUFFormat; 5698 5699 if (!trySkipId("format", AsmToken::Colon)) 5700 return MatchOperand_NoMatch; 5701 5702 if (trySkipToken(AsmToken::LBrac)) { 5703 StringRef FormatStr; 5704 SMLoc Loc = getLoc(); 5705 if (!parseId(FormatStr, "expected a format string")) 5706 return MatchOperand_ParseFail; 5707 5708 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5709 if (Res == MatchOperand_NoMatch) 5710 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5711 if (Res != MatchOperand_Success) 5712 return Res; 5713 5714 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5715 return MatchOperand_ParseFail; 5716 5717 return MatchOperand_Success; 5718 } 5719 5720 return parseNumericFormat(Format); 5721 } 5722 5723 OperandMatchResultTy 5724 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5725 using namespace llvm::AMDGPU::MTBUFFormat; 5726 5727 int64_t Format = getDefaultFormatEncoding(getSTI()); 5728 OperandMatchResultTy Res; 5729 SMLoc Loc = getLoc(); 5730 5731 // Parse legacy format syntax. 5732 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5733 if (Res == MatchOperand_ParseFail) 5734 return Res; 5735 5736 bool FormatFound = (Res == MatchOperand_Success); 5737 5738 Operands.push_back( 5739 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5740 5741 if (FormatFound) 5742 trySkipToken(AsmToken::Comma); 5743 5744 if (isToken(AsmToken::EndOfStatement)) { 5745 // We are expecting an soffset operand, 5746 // but let matcher handle the error. 5747 return MatchOperand_Success; 5748 } 5749 5750 // Parse soffset. 5751 Res = parseRegOrImm(Operands); 5752 if (Res != MatchOperand_Success) 5753 return Res; 5754 5755 trySkipToken(AsmToken::Comma); 5756 5757 if (!FormatFound) { 5758 Res = parseSymbolicOrNumericFormat(Format); 5759 if (Res == MatchOperand_ParseFail) 5760 return Res; 5761 if (Res == MatchOperand_Success) { 5762 auto Size = Operands.size(); 5763 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5764 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5765 Op.setImm(Format); 5766 } 5767 return MatchOperand_Success; 5768 } 5769 5770 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5771 Error(getLoc(), "duplicate format"); 5772 return MatchOperand_ParseFail; 5773 } 5774 return MatchOperand_Success; 5775 } 5776 5777 //===----------------------------------------------------------------------===// 5778 // ds 5779 //===----------------------------------------------------------------------===// 5780 5781 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5782 const OperandVector &Operands) { 5783 OptionalImmIndexMap OptionalIdx; 5784 5785 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5786 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5787 5788 // Add the register arguments 5789 if (Op.isReg()) { 5790 Op.addRegOperands(Inst, 1); 5791 continue; 5792 } 5793 5794 // Handle optional arguments 5795 OptionalIdx[Op.getImmTy()] = i; 5796 } 5797 5798 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5799 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5800 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5801 5802 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5803 } 5804 5805 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5806 bool IsGdsHardcoded) { 5807 OptionalImmIndexMap OptionalIdx; 5808 5809 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5810 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5811 5812 // Add the register arguments 5813 if (Op.isReg()) { 5814 Op.addRegOperands(Inst, 1); 5815 continue; 5816 } 5817 5818 if (Op.isToken() && Op.getToken() == "gds") { 5819 IsGdsHardcoded = true; 5820 continue; 5821 } 5822 5823 // Handle optional arguments 5824 OptionalIdx[Op.getImmTy()] = i; 5825 } 5826 5827 AMDGPUOperand::ImmTy OffsetType = 5828 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5829 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5830 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5831 AMDGPUOperand::ImmTyOffset; 5832 5833 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5834 5835 if (!IsGdsHardcoded) { 5836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5837 } 5838 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5839 } 5840 5841 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5842 OptionalImmIndexMap OptionalIdx; 5843 5844 unsigned OperandIdx[4]; 5845 unsigned EnMask = 0; 5846 int SrcIdx = 0; 5847 5848 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5849 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5850 5851 // Add the register arguments 5852 if (Op.isReg()) { 5853 assert(SrcIdx < 4); 5854 OperandIdx[SrcIdx] = Inst.size(); 5855 Op.addRegOperands(Inst, 1); 5856 ++SrcIdx; 5857 continue; 5858 } 5859 5860 if (Op.isOff()) { 5861 assert(SrcIdx < 4); 5862 OperandIdx[SrcIdx] = Inst.size(); 5863 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5864 ++SrcIdx; 5865 continue; 5866 } 5867 5868 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5869 Op.addImmOperands(Inst, 1); 5870 continue; 5871 } 5872 5873 if (Op.isToken() && Op.getToken() == "done") 5874 continue; 5875 5876 // Handle optional arguments 5877 OptionalIdx[Op.getImmTy()] = i; 5878 } 5879 5880 assert(SrcIdx == 4); 5881 5882 bool Compr = false; 5883 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5884 Compr = true; 5885 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5886 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5887 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5888 } 5889 5890 for (auto i = 0; i < SrcIdx; ++i) { 5891 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5892 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5893 } 5894 } 5895 5896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5897 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5898 5899 Inst.addOperand(MCOperand::createImm(EnMask)); 5900 } 5901 5902 //===----------------------------------------------------------------------===// 5903 // s_waitcnt 5904 //===----------------------------------------------------------------------===// 5905 5906 static bool 5907 encodeCnt( 5908 const AMDGPU::IsaVersion ISA, 5909 int64_t &IntVal, 5910 int64_t CntVal, 5911 bool Saturate, 5912 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5913 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5914 { 5915 bool Failed = false; 5916 5917 IntVal = encode(ISA, IntVal, CntVal); 5918 if (CntVal != decode(ISA, IntVal)) { 5919 if (Saturate) { 5920 IntVal = encode(ISA, IntVal, -1); 5921 } else { 5922 Failed = true; 5923 } 5924 } 5925 return Failed; 5926 } 5927 5928 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5929 5930 SMLoc CntLoc = getLoc(); 5931 StringRef CntName = getTokenStr(); 5932 5933 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5934 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5935 return false; 5936 5937 int64_t CntVal; 5938 SMLoc ValLoc = getLoc(); 5939 if (!parseExpr(CntVal)) 5940 return false; 5941 5942 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5943 5944 bool Failed = true; 5945 bool Sat = CntName.endswith("_sat"); 5946 5947 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5948 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5949 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5950 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5951 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5952 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5953 } else { 5954 Error(CntLoc, "invalid counter name " + CntName); 5955 return false; 5956 } 5957 5958 if (Failed) { 5959 Error(ValLoc, "too large value for " + CntName); 5960 return false; 5961 } 5962 5963 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5964 return false; 5965 5966 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5967 if (isToken(AsmToken::EndOfStatement)) { 5968 Error(getLoc(), "expected a counter name"); 5969 return false; 5970 } 5971 } 5972 5973 return true; 5974 } 5975 5976 OperandMatchResultTy 5977 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5978 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5979 int64_t Waitcnt = getWaitcntBitMask(ISA); 5980 SMLoc S = getLoc(); 5981 5982 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5983 while (!isToken(AsmToken::EndOfStatement)) { 5984 if (!parseCnt(Waitcnt)) 5985 return MatchOperand_ParseFail; 5986 } 5987 } else { 5988 if (!parseExpr(Waitcnt)) 5989 return MatchOperand_ParseFail; 5990 } 5991 5992 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5993 return MatchOperand_Success; 5994 } 5995 5996 bool 5997 AMDGPUOperand::isSWaitCnt() const { 5998 return isImm(); 5999 } 6000 6001 //===----------------------------------------------------------------------===// 6002 // hwreg 6003 //===----------------------------------------------------------------------===// 6004 6005 bool 6006 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 6007 OperandInfoTy &Offset, 6008 OperandInfoTy &Width) { 6009 using namespace llvm::AMDGPU::Hwreg; 6010 6011 // The register may be specified by name or using a numeric code 6012 HwReg.Loc = getLoc(); 6013 if (isToken(AsmToken::Identifier) && 6014 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 6015 HwReg.IsSymbolic = true; 6016 lex(); // skip register name 6017 } else if (!parseExpr(HwReg.Id, "a register name")) { 6018 return false; 6019 } 6020 6021 if (trySkipToken(AsmToken::RParen)) 6022 return true; 6023 6024 // parse optional params 6025 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 6026 return false; 6027 6028 Offset.Loc = getLoc(); 6029 if (!parseExpr(Offset.Id)) 6030 return false; 6031 6032 if (!skipToken(AsmToken::Comma, "expected a comma")) 6033 return false; 6034 6035 Width.Loc = getLoc(); 6036 return parseExpr(Width.Id) && 6037 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6038 } 6039 6040 bool 6041 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 6042 const OperandInfoTy &Offset, 6043 const OperandInfoTy &Width) { 6044 6045 using namespace llvm::AMDGPU::Hwreg; 6046 6047 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 6048 Error(HwReg.Loc, 6049 "specified hardware register is not supported on this GPU"); 6050 return false; 6051 } 6052 if (!isValidHwreg(HwReg.Id)) { 6053 Error(HwReg.Loc, 6054 "invalid code of hardware register: only 6-bit values are legal"); 6055 return false; 6056 } 6057 if (!isValidHwregOffset(Offset.Id)) { 6058 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 6059 return false; 6060 } 6061 if (!isValidHwregWidth(Width.Id)) { 6062 Error(Width.Loc, 6063 "invalid bitfield width: only values from 1 to 32 are legal"); 6064 return false; 6065 } 6066 return true; 6067 } 6068 6069 OperandMatchResultTy 6070 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6071 using namespace llvm::AMDGPU::Hwreg; 6072 6073 int64_t ImmVal = 0; 6074 SMLoc Loc = getLoc(); 6075 6076 if (trySkipId("hwreg", AsmToken::LParen)) { 6077 OperandInfoTy HwReg(ID_UNKNOWN_); 6078 OperandInfoTy Offset(OFFSET_DEFAULT_); 6079 OperandInfoTy Width(WIDTH_DEFAULT_); 6080 if (parseHwregBody(HwReg, Offset, Width) && 6081 validateHwreg(HwReg, Offset, Width)) { 6082 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6083 } else { 6084 return MatchOperand_ParseFail; 6085 } 6086 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6087 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6088 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6089 return MatchOperand_ParseFail; 6090 } 6091 } else { 6092 return MatchOperand_ParseFail; 6093 } 6094 6095 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6096 return MatchOperand_Success; 6097 } 6098 6099 bool AMDGPUOperand::isHwreg() const { 6100 return isImmTy(ImmTyHwreg); 6101 } 6102 6103 //===----------------------------------------------------------------------===// 6104 // sendmsg 6105 //===----------------------------------------------------------------------===// 6106 6107 bool 6108 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6109 OperandInfoTy &Op, 6110 OperandInfoTy &Stream) { 6111 using namespace llvm::AMDGPU::SendMsg; 6112 6113 Msg.Loc = getLoc(); 6114 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6115 Msg.IsSymbolic = true; 6116 lex(); // skip message name 6117 } else if (!parseExpr(Msg.Id, "a message name")) { 6118 return false; 6119 } 6120 6121 if (trySkipToken(AsmToken::Comma)) { 6122 Op.IsDefined = true; 6123 Op.Loc = getLoc(); 6124 if (isToken(AsmToken::Identifier) && 6125 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6126 lex(); // skip operation name 6127 } else if (!parseExpr(Op.Id, "an operation name")) { 6128 return false; 6129 } 6130 6131 if (trySkipToken(AsmToken::Comma)) { 6132 Stream.IsDefined = true; 6133 Stream.Loc = getLoc(); 6134 if (!parseExpr(Stream.Id)) 6135 return false; 6136 } 6137 } 6138 6139 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6140 } 6141 6142 bool 6143 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6144 const OperandInfoTy &Op, 6145 const OperandInfoTy &Stream) { 6146 using namespace llvm::AMDGPU::SendMsg; 6147 6148 // Validation strictness depends on whether message is specified 6149 // in a symbolc or in a numeric form. In the latter case 6150 // only encoding possibility is checked. 6151 bool Strict = Msg.IsSymbolic; 6152 6153 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6154 Error(Msg.Loc, "invalid message id"); 6155 return false; 6156 } 6157 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6158 if (Op.IsDefined) { 6159 Error(Op.Loc, "message does not support operations"); 6160 } else { 6161 Error(Msg.Loc, "missing message operation"); 6162 } 6163 return false; 6164 } 6165 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6166 Error(Op.Loc, "invalid operation id"); 6167 return false; 6168 } 6169 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6170 Error(Stream.Loc, "message operation does not support streams"); 6171 return false; 6172 } 6173 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6174 Error(Stream.Loc, "invalid message stream id"); 6175 return false; 6176 } 6177 return true; 6178 } 6179 6180 OperandMatchResultTy 6181 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6182 using namespace llvm::AMDGPU::SendMsg; 6183 6184 int64_t ImmVal = 0; 6185 SMLoc Loc = getLoc(); 6186 6187 if (trySkipId("sendmsg", AsmToken::LParen)) { 6188 OperandInfoTy Msg(ID_UNKNOWN_); 6189 OperandInfoTy Op(OP_NONE_); 6190 OperandInfoTy Stream(STREAM_ID_NONE_); 6191 if (parseSendMsgBody(Msg, Op, Stream) && 6192 validateSendMsg(Msg, Op, Stream)) { 6193 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6194 } else { 6195 return MatchOperand_ParseFail; 6196 } 6197 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6198 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6199 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6200 return MatchOperand_ParseFail; 6201 } 6202 } else { 6203 return MatchOperand_ParseFail; 6204 } 6205 6206 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6207 return MatchOperand_Success; 6208 } 6209 6210 bool AMDGPUOperand::isSendMsg() const { 6211 return isImmTy(ImmTySendMsg); 6212 } 6213 6214 //===----------------------------------------------------------------------===// 6215 // v_interp 6216 //===----------------------------------------------------------------------===// 6217 6218 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6219 StringRef Str; 6220 SMLoc S = getLoc(); 6221 6222 if (!parseId(Str)) 6223 return MatchOperand_NoMatch; 6224 6225 int Slot = StringSwitch<int>(Str) 6226 .Case("p10", 0) 6227 .Case("p20", 1) 6228 .Case("p0", 2) 6229 .Default(-1); 6230 6231 if (Slot == -1) { 6232 Error(S, "invalid interpolation slot"); 6233 return MatchOperand_ParseFail; 6234 } 6235 6236 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6237 AMDGPUOperand::ImmTyInterpSlot)); 6238 return MatchOperand_Success; 6239 } 6240 6241 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6242 StringRef Str; 6243 SMLoc S = getLoc(); 6244 6245 if (!parseId(Str)) 6246 return MatchOperand_NoMatch; 6247 6248 if (!Str.startswith("attr")) { 6249 Error(S, "invalid interpolation attribute"); 6250 return MatchOperand_ParseFail; 6251 } 6252 6253 StringRef Chan = Str.take_back(2); 6254 int AttrChan = StringSwitch<int>(Chan) 6255 .Case(".x", 0) 6256 .Case(".y", 1) 6257 .Case(".z", 2) 6258 .Case(".w", 3) 6259 .Default(-1); 6260 if (AttrChan == -1) { 6261 Error(S, "invalid or missing interpolation attribute channel"); 6262 return MatchOperand_ParseFail; 6263 } 6264 6265 Str = Str.drop_back(2).drop_front(4); 6266 6267 uint8_t Attr; 6268 if (Str.getAsInteger(10, Attr)) { 6269 Error(S, "invalid or missing interpolation attribute number"); 6270 return MatchOperand_ParseFail; 6271 } 6272 6273 if (Attr > 63) { 6274 Error(S, "out of bounds interpolation attribute number"); 6275 return MatchOperand_ParseFail; 6276 } 6277 6278 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6279 6280 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6281 AMDGPUOperand::ImmTyInterpAttr)); 6282 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6283 AMDGPUOperand::ImmTyAttrChan)); 6284 return MatchOperand_Success; 6285 } 6286 6287 //===----------------------------------------------------------------------===// 6288 // exp 6289 //===----------------------------------------------------------------------===// 6290 6291 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6292 using namespace llvm::AMDGPU::Exp; 6293 6294 StringRef Str; 6295 SMLoc S = getLoc(); 6296 6297 if (!parseId(Str)) 6298 return MatchOperand_NoMatch; 6299 6300 unsigned Id = getTgtId(Str); 6301 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6302 Error(S, (Id == ET_INVALID) ? 6303 "invalid exp target" : 6304 "exp target is not supported on this GPU"); 6305 return MatchOperand_ParseFail; 6306 } 6307 6308 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6309 AMDGPUOperand::ImmTyExpTgt)); 6310 return MatchOperand_Success; 6311 } 6312 6313 //===----------------------------------------------------------------------===// 6314 // parser helpers 6315 //===----------------------------------------------------------------------===// 6316 6317 bool 6318 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6319 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6320 } 6321 6322 bool 6323 AMDGPUAsmParser::isId(const StringRef Id) const { 6324 return isId(getToken(), Id); 6325 } 6326 6327 bool 6328 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6329 return getTokenKind() == Kind; 6330 } 6331 6332 bool 6333 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6334 if (isId(Id)) { 6335 lex(); 6336 return true; 6337 } 6338 return false; 6339 } 6340 6341 bool 6342 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6343 if (isToken(AsmToken::Identifier)) { 6344 StringRef Tok = getTokenStr(); 6345 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6346 lex(); 6347 return true; 6348 } 6349 } 6350 return false; 6351 } 6352 6353 bool 6354 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6355 if (isId(Id) && peekToken().is(Kind)) { 6356 lex(); 6357 lex(); 6358 return true; 6359 } 6360 return false; 6361 } 6362 6363 bool 6364 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6365 if (isToken(Kind)) { 6366 lex(); 6367 return true; 6368 } 6369 return false; 6370 } 6371 6372 bool 6373 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6374 const StringRef ErrMsg) { 6375 if (!trySkipToken(Kind)) { 6376 Error(getLoc(), ErrMsg); 6377 return false; 6378 } 6379 return true; 6380 } 6381 6382 bool 6383 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6384 SMLoc S = getLoc(); 6385 6386 const MCExpr *Expr; 6387 if (Parser.parseExpression(Expr)) 6388 return false; 6389 6390 if (Expr->evaluateAsAbsolute(Imm)) 6391 return true; 6392 6393 if (Expected.empty()) { 6394 Error(S, "expected absolute expression"); 6395 } else { 6396 Error(S, Twine("expected ", Expected) + 6397 Twine(" or an absolute expression")); 6398 } 6399 return false; 6400 } 6401 6402 bool 6403 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6404 SMLoc S = getLoc(); 6405 6406 const MCExpr *Expr; 6407 if (Parser.parseExpression(Expr)) 6408 return false; 6409 6410 int64_t IntVal; 6411 if (Expr->evaluateAsAbsolute(IntVal)) { 6412 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6413 } else { 6414 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6415 } 6416 return true; 6417 } 6418 6419 bool 6420 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6421 if (isToken(AsmToken::String)) { 6422 Val = getToken().getStringContents(); 6423 lex(); 6424 return true; 6425 } else { 6426 Error(getLoc(), ErrMsg); 6427 return false; 6428 } 6429 } 6430 6431 bool 6432 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6433 if (isToken(AsmToken::Identifier)) { 6434 Val = getTokenStr(); 6435 lex(); 6436 return true; 6437 } else { 6438 if (!ErrMsg.empty()) 6439 Error(getLoc(), ErrMsg); 6440 return false; 6441 } 6442 } 6443 6444 AsmToken 6445 AMDGPUAsmParser::getToken() const { 6446 return Parser.getTok(); 6447 } 6448 6449 AsmToken 6450 AMDGPUAsmParser::peekToken() { 6451 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6452 } 6453 6454 void 6455 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6456 auto TokCount = getLexer().peekTokens(Tokens); 6457 6458 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6459 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6460 } 6461 6462 AsmToken::TokenKind 6463 AMDGPUAsmParser::getTokenKind() const { 6464 return getLexer().getKind(); 6465 } 6466 6467 SMLoc 6468 AMDGPUAsmParser::getLoc() const { 6469 return getToken().getLoc(); 6470 } 6471 6472 StringRef 6473 AMDGPUAsmParser::getTokenStr() const { 6474 return getToken().getString(); 6475 } 6476 6477 void 6478 AMDGPUAsmParser::lex() { 6479 Parser.Lex(); 6480 } 6481 6482 SMLoc 6483 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6484 const OperandVector &Operands) const { 6485 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6486 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6487 if (Test(Op)) 6488 return Op.getStartLoc(); 6489 } 6490 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6491 } 6492 6493 SMLoc 6494 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6495 const OperandVector &Operands) const { 6496 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6497 return getOperandLoc(Test, Operands); 6498 } 6499 6500 SMLoc 6501 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6502 const OperandVector &Operands) const { 6503 auto Test = [=](const AMDGPUOperand& Op) { 6504 return Op.isRegKind() && Op.getReg() == Reg; 6505 }; 6506 return getOperandLoc(Test, Operands); 6507 } 6508 6509 SMLoc 6510 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6511 auto Test = [](const AMDGPUOperand& Op) { 6512 return Op.IsImmKindLiteral() || Op.isExpr(); 6513 }; 6514 return getOperandLoc(Test, Operands); 6515 } 6516 6517 SMLoc 6518 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6519 auto Test = [](const AMDGPUOperand& Op) { 6520 return Op.isImmKindConst(); 6521 }; 6522 return getOperandLoc(Test, Operands); 6523 } 6524 6525 //===----------------------------------------------------------------------===// 6526 // swizzle 6527 //===----------------------------------------------------------------------===// 6528 6529 LLVM_READNONE 6530 static unsigned 6531 encodeBitmaskPerm(const unsigned AndMask, 6532 const unsigned OrMask, 6533 const unsigned XorMask) { 6534 using namespace llvm::AMDGPU::Swizzle; 6535 6536 return BITMASK_PERM_ENC | 6537 (AndMask << BITMASK_AND_SHIFT) | 6538 (OrMask << BITMASK_OR_SHIFT) | 6539 (XorMask << BITMASK_XOR_SHIFT); 6540 } 6541 6542 bool 6543 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6544 const unsigned MinVal, 6545 const unsigned MaxVal, 6546 const StringRef ErrMsg, 6547 SMLoc &Loc) { 6548 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6549 return false; 6550 } 6551 Loc = getLoc(); 6552 if (!parseExpr(Op)) { 6553 return false; 6554 } 6555 if (Op < MinVal || Op > MaxVal) { 6556 Error(Loc, ErrMsg); 6557 return false; 6558 } 6559 6560 return true; 6561 } 6562 6563 bool 6564 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6565 const unsigned MinVal, 6566 const unsigned MaxVal, 6567 const StringRef ErrMsg) { 6568 SMLoc Loc; 6569 for (unsigned i = 0; i < OpNum; ++i) { 6570 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6571 return false; 6572 } 6573 6574 return true; 6575 } 6576 6577 bool 6578 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6579 using namespace llvm::AMDGPU::Swizzle; 6580 6581 int64_t Lane[LANE_NUM]; 6582 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6583 "expected a 2-bit lane id")) { 6584 Imm = QUAD_PERM_ENC; 6585 for (unsigned I = 0; I < LANE_NUM; ++I) { 6586 Imm |= Lane[I] << (LANE_SHIFT * I); 6587 } 6588 return true; 6589 } 6590 return false; 6591 } 6592 6593 bool 6594 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6595 using namespace llvm::AMDGPU::Swizzle; 6596 6597 SMLoc Loc; 6598 int64_t GroupSize; 6599 int64_t LaneIdx; 6600 6601 if (!parseSwizzleOperand(GroupSize, 6602 2, 32, 6603 "group size must be in the interval [2,32]", 6604 Loc)) { 6605 return false; 6606 } 6607 if (!isPowerOf2_64(GroupSize)) { 6608 Error(Loc, "group size must be a power of two"); 6609 return false; 6610 } 6611 if (parseSwizzleOperand(LaneIdx, 6612 0, GroupSize - 1, 6613 "lane id must be in the interval [0,group size - 1]", 6614 Loc)) { 6615 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6616 return true; 6617 } 6618 return false; 6619 } 6620 6621 bool 6622 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6623 using namespace llvm::AMDGPU::Swizzle; 6624 6625 SMLoc Loc; 6626 int64_t GroupSize; 6627 6628 if (!parseSwizzleOperand(GroupSize, 6629 2, 32, 6630 "group size must be in the interval [2,32]", 6631 Loc)) { 6632 return false; 6633 } 6634 if (!isPowerOf2_64(GroupSize)) { 6635 Error(Loc, "group size must be a power of two"); 6636 return false; 6637 } 6638 6639 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6640 return true; 6641 } 6642 6643 bool 6644 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6645 using namespace llvm::AMDGPU::Swizzle; 6646 6647 SMLoc Loc; 6648 int64_t GroupSize; 6649 6650 if (!parseSwizzleOperand(GroupSize, 6651 1, 16, 6652 "group size must be in the interval [1,16]", 6653 Loc)) { 6654 return false; 6655 } 6656 if (!isPowerOf2_64(GroupSize)) { 6657 Error(Loc, "group size must be a power of two"); 6658 return false; 6659 } 6660 6661 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6662 return true; 6663 } 6664 6665 bool 6666 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6667 using namespace llvm::AMDGPU::Swizzle; 6668 6669 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6670 return false; 6671 } 6672 6673 StringRef Ctl; 6674 SMLoc StrLoc = getLoc(); 6675 if (!parseString(Ctl)) { 6676 return false; 6677 } 6678 if (Ctl.size() != BITMASK_WIDTH) { 6679 Error(StrLoc, "expected a 5-character mask"); 6680 return false; 6681 } 6682 6683 unsigned AndMask = 0; 6684 unsigned OrMask = 0; 6685 unsigned XorMask = 0; 6686 6687 for (size_t i = 0; i < Ctl.size(); ++i) { 6688 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6689 switch(Ctl[i]) { 6690 default: 6691 Error(StrLoc, "invalid mask"); 6692 return false; 6693 case '0': 6694 break; 6695 case '1': 6696 OrMask |= Mask; 6697 break; 6698 case 'p': 6699 AndMask |= Mask; 6700 break; 6701 case 'i': 6702 AndMask |= Mask; 6703 XorMask |= Mask; 6704 break; 6705 } 6706 } 6707 6708 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6709 return true; 6710 } 6711 6712 bool 6713 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6714 6715 SMLoc OffsetLoc = getLoc(); 6716 6717 if (!parseExpr(Imm, "a swizzle macro")) { 6718 return false; 6719 } 6720 if (!isUInt<16>(Imm)) { 6721 Error(OffsetLoc, "expected a 16-bit offset"); 6722 return false; 6723 } 6724 return true; 6725 } 6726 6727 bool 6728 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6729 using namespace llvm::AMDGPU::Swizzle; 6730 6731 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6732 6733 SMLoc ModeLoc = getLoc(); 6734 bool Ok = false; 6735 6736 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6737 Ok = parseSwizzleQuadPerm(Imm); 6738 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6739 Ok = parseSwizzleBitmaskPerm(Imm); 6740 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6741 Ok = parseSwizzleBroadcast(Imm); 6742 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6743 Ok = parseSwizzleSwap(Imm); 6744 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6745 Ok = parseSwizzleReverse(Imm); 6746 } else { 6747 Error(ModeLoc, "expected a swizzle mode"); 6748 } 6749 6750 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6751 } 6752 6753 return false; 6754 } 6755 6756 OperandMatchResultTy 6757 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6758 SMLoc S = getLoc(); 6759 int64_t Imm = 0; 6760 6761 if (trySkipId("offset")) { 6762 6763 bool Ok = false; 6764 if (skipToken(AsmToken::Colon, "expected a colon")) { 6765 if (trySkipId("swizzle")) { 6766 Ok = parseSwizzleMacro(Imm); 6767 } else { 6768 Ok = parseSwizzleOffset(Imm); 6769 } 6770 } 6771 6772 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6773 6774 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6775 } else { 6776 // Swizzle "offset" operand is optional. 6777 // If it is omitted, try parsing other optional operands. 6778 return parseOptionalOpr(Operands); 6779 } 6780 } 6781 6782 bool 6783 AMDGPUOperand::isSwizzle() const { 6784 return isImmTy(ImmTySwizzle); 6785 } 6786 6787 //===----------------------------------------------------------------------===// 6788 // VGPR Index Mode 6789 //===----------------------------------------------------------------------===// 6790 6791 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6792 6793 using namespace llvm::AMDGPU::VGPRIndexMode; 6794 6795 if (trySkipToken(AsmToken::RParen)) { 6796 return OFF; 6797 } 6798 6799 int64_t Imm = 0; 6800 6801 while (true) { 6802 unsigned Mode = 0; 6803 SMLoc S = getLoc(); 6804 6805 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6806 if (trySkipId(IdSymbolic[ModeId])) { 6807 Mode = 1 << ModeId; 6808 break; 6809 } 6810 } 6811 6812 if (Mode == 0) { 6813 Error(S, (Imm == 0)? 6814 "expected a VGPR index mode or a closing parenthesis" : 6815 "expected a VGPR index mode"); 6816 return UNDEF; 6817 } 6818 6819 if (Imm & Mode) { 6820 Error(S, "duplicate VGPR index mode"); 6821 return UNDEF; 6822 } 6823 Imm |= Mode; 6824 6825 if (trySkipToken(AsmToken::RParen)) 6826 break; 6827 if (!skipToken(AsmToken::Comma, 6828 "expected a comma or a closing parenthesis")) 6829 return UNDEF; 6830 } 6831 6832 return Imm; 6833 } 6834 6835 OperandMatchResultTy 6836 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6837 6838 using namespace llvm::AMDGPU::VGPRIndexMode; 6839 6840 int64_t Imm = 0; 6841 SMLoc S = getLoc(); 6842 6843 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6844 Imm = parseGPRIdxMacro(); 6845 if (Imm == UNDEF) 6846 return MatchOperand_ParseFail; 6847 } else { 6848 if (getParser().parseAbsoluteExpression(Imm)) 6849 return MatchOperand_ParseFail; 6850 if (Imm < 0 || !isUInt<4>(Imm)) { 6851 Error(S, "invalid immediate: only 4-bit values are legal"); 6852 return MatchOperand_ParseFail; 6853 } 6854 } 6855 6856 Operands.push_back( 6857 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6858 return MatchOperand_Success; 6859 } 6860 6861 bool AMDGPUOperand::isGPRIdxMode() const { 6862 return isImmTy(ImmTyGprIdxMode); 6863 } 6864 6865 //===----------------------------------------------------------------------===// 6866 // sopp branch targets 6867 //===----------------------------------------------------------------------===// 6868 6869 OperandMatchResultTy 6870 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6871 6872 // Make sure we are not parsing something 6873 // that looks like a label or an expression but is not. 6874 // This will improve error messages. 6875 if (isRegister() || isModifier()) 6876 return MatchOperand_NoMatch; 6877 6878 if (!parseExpr(Operands)) 6879 return MatchOperand_ParseFail; 6880 6881 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6882 assert(Opr.isImm() || Opr.isExpr()); 6883 SMLoc Loc = Opr.getStartLoc(); 6884 6885 // Currently we do not support arbitrary expressions as branch targets. 6886 // Only labels and absolute expressions are accepted. 6887 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6888 Error(Loc, "expected an absolute expression or a label"); 6889 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6890 Error(Loc, "expected a 16-bit signed jump offset"); 6891 } 6892 6893 return MatchOperand_Success; 6894 } 6895 6896 //===----------------------------------------------------------------------===// 6897 // Boolean holding registers 6898 //===----------------------------------------------------------------------===// 6899 6900 OperandMatchResultTy 6901 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6902 return parseReg(Operands); 6903 } 6904 6905 //===----------------------------------------------------------------------===// 6906 // mubuf 6907 //===----------------------------------------------------------------------===// 6908 6909 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const { 6910 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol); 6911 } 6912 6913 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol_GLC1() const { 6914 return AMDGPUOperand::CreateImm(this, CPol::GLC, SMLoc(), 6915 AMDGPUOperand::ImmTyCPol); 6916 } 6917 6918 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6919 const OperandVector &Operands, 6920 bool IsAtomic, 6921 bool IsLds) { 6922 bool IsLdsOpcode = IsLds; 6923 bool HasLdsModifier = false; 6924 OptionalImmIndexMap OptionalIdx; 6925 unsigned FirstOperandIdx = 1; 6926 bool IsAtomicReturn = false; 6927 6928 if (IsAtomic) { 6929 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6930 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6931 if (!Op.isCPol()) 6932 continue; 6933 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 6934 break; 6935 } 6936 6937 if (!IsAtomicReturn) { 6938 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 6939 if (NewOpc != -1) 6940 Inst.setOpcode(NewOpc); 6941 } 6942 6943 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 6944 SIInstrFlags::IsAtomicRet; 6945 } 6946 6947 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6948 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6949 6950 // Add the register arguments 6951 if (Op.isReg()) { 6952 Op.addRegOperands(Inst, 1); 6953 // Insert a tied src for atomic return dst. 6954 // This cannot be postponed as subsequent calls to 6955 // addImmOperands rely on correct number of MC operands. 6956 if (IsAtomicReturn && i == FirstOperandIdx) 6957 Op.addRegOperands(Inst, 1); 6958 continue; 6959 } 6960 6961 // Handle the case where soffset is an immediate 6962 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6963 Op.addImmOperands(Inst, 1); 6964 continue; 6965 } 6966 6967 HasLdsModifier |= Op.isLDS(); 6968 6969 // Handle tokens like 'offen' which are sometimes hard-coded into the 6970 // asm string. There are no MCInst operands for these. 6971 if (Op.isToken()) { 6972 continue; 6973 } 6974 assert(Op.isImm()); 6975 6976 // Handle optional arguments 6977 OptionalIdx[Op.getImmTy()] = i; 6978 } 6979 6980 // This is a workaround for an llvm quirk which may result in an 6981 // incorrect instruction selection. Lds and non-lds versions of 6982 // MUBUF instructions are identical except that lds versions 6983 // have mandatory 'lds' modifier. However this modifier follows 6984 // optional modifiers and llvm asm matcher regards this 'lds' 6985 // modifier as an optional one. As a result, an lds version 6986 // of opcode may be selected even if it has no 'lds' modifier. 6987 if (IsLdsOpcode && !HasLdsModifier) { 6988 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6989 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6990 Inst.setOpcode(NoLdsOpcode); 6991 IsLdsOpcode = false; 6992 } 6993 } 6994 6995 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 6997 6998 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6999 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7000 } 7001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7002 } 7003 7004 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 7005 OptionalImmIndexMap OptionalIdx; 7006 7007 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7008 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7009 7010 // Add the register arguments 7011 if (Op.isReg()) { 7012 Op.addRegOperands(Inst, 1); 7013 continue; 7014 } 7015 7016 // Handle the case where soffset is an immediate 7017 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7018 Op.addImmOperands(Inst, 1); 7019 continue; 7020 } 7021 7022 // Handle tokens like 'offen' which are sometimes hard-coded into the 7023 // asm string. There are no MCInst operands for these. 7024 if (Op.isToken()) { 7025 continue; 7026 } 7027 assert(Op.isImm()); 7028 7029 // Handle optional arguments 7030 OptionalIdx[Op.getImmTy()] = i; 7031 } 7032 7033 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7034 AMDGPUOperand::ImmTyOffset); 7035 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 7036 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7037 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7038 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 7039 } 7040 7041 //===----------------------------------------------------------------------===// 7042 // mimg 7043 //===----------------------------------------------------------------------===// 7044 7045 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 7046 bool IsAtomic) { 7047 unsigned I = 1; 7048 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7049 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7050 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7051 } 7052 7053 if (IsAtomic) { 7054 // Add src, same as dst 7055 assert(Desc.getNumDefs() == 1); 7056 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 7057 } 7058 7059 OptionalImmIndexMap OptionalIdx; 7060 7061 for (unsigned E = Operands.size(); I != E; ++I) { 7062 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7063 7064 // Add the register arguments 7065 if (Op.isReg()) { 7066 Op.addRegOperands(Inst, 1); 7067 } else if (Op.isImmModifier()) { 7068 OptionalIdx[Op.getImmTy()] = I; 7069 } else if (!Op.isToken()) { 7070 llvm_unreachable("unexpected operand type"); 7071 } 7072 } 7073 7074 bool IsGFX10Plus = isGFX10Plus(); 7075 7076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7077 if (IsGFX10Plus) 7078 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7080 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol); 7081 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7082 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7084 if (IsGFX10Plus) 7085 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7086 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7087 if (!IsGFX10Plus) 7088 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7089 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7090 } 7091 7092 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7093 cvtMIMG(Inst, Operands, true); 7094 } 7095 7096 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) { 7097 OptionalImmIndexMap OptionalIdx; 7098 bool IsAtomicReturn = false; 7099 7100 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7101 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7102 if (!Op.isCPol()) 7103 continue; 7104 IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC; 7105 break; 7106 } 7107 7108 if (!IsAtomicReturn) { 7109 int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode()); 7110 if (NewOpc != -1) 7111 Inst.setOpcode(NewOpc); 7112 } 7113 7114 IsAtomicReturn = MII.get(Inst.getOpcode()).TSFlags & 7115 SIInstrFlags::IsAtomicRet; 7116 7117 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 7118 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 7119 7120 // Add the register arguments 7121 if (Op.isReg()) { 7122 Op.addRegOperands(Inst, 1); 7123 if (IsAtomicReturn && i == 1) 7124 Op.addRegOperands(Inst, 1); 7125 continue; 7126 } 7127 7128 // Handle the case where soffset is an immediate 7129 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 7130 Op.addImmOperands(Inst, 1); 7131 continue; 7132 } 7133 7134 // Handle tokens like 'offen' which are sometimes hard-coded into the 7135 // asm string. There are no MCInst operands for these. 7136 if (Op.isToken()) { 7137 continue; 7138 } 7139 assert(Op.isImm()); 7140 7141 // Handle optional arguments 7142 OptionalIdx[Op.getImmTy()] = i; 7143 } 7144 7145 if ((int)Inst.getNumOperands() <= 7146 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset)) 7147 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 7148 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0); 7149 } 7150 7151 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7152 const OperandVector &Operands) { 7153 for (unsigned I = 1; I < Operands.size(); ++I) { 7154 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7155 if (Operand.isReg()) 7156 Operand.addRegOperands(Inst, 1); 7157 } 7158 7159 Inst.addOperand(MCOperand::createImm(1)); // a16 7160 } 7161 7162 //===----------------------------------------------------------------------===// 7163 // smrd 7164 //===----------------------------------------------------------------------===// 7165 7166 bool AMDGPUOperand::isSMRDOffset8() const { 7167 return isImm() && isUInt<8>(getImm()); 7168 } 7169 7170 bool AMDGPUOperand::isSMEMOffset() const { 7171 return isImm(); // Offset range is checked later by validator. 7172 } 7173 7174 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7175 // 32-bit literals are only supported on CI and we only want to use them 7176 // when the offset is > 8-bits. 7177 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7178 } 7179 7180 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7181 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7182 } 7183 7184 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7185 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7186 } 7187 7188 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7189 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7190 } 7191 7192 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7193 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7194 } 7195 7196 //===----------------------------------------------------------------------===// 7197 // vop3 7198 //===----------------------------------------------------------------------===// 7199 7200 static bool ConvertOmodMul(int64_t &Mul) { 7201 if (Mul != 1 && Mul != 2 && Mul != 4) 7202 return false; 7203 7204 Mul >>= 1; 7205 return true; 7206 } 7207 7208 static bool ConvertOmodDiv(int64_t &Div) { 7209 if (Div == 1) { 7210 Div = 0; 7211 return true; 7212 } 7213 7214 if (Div == 2) { 7215 Div = 3; 7216 return true; 7217 } 7218 7219 return false; 7220 } 7221 7222 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7223 // This is intentional and ensures compatibility with sp3. 7224 // See bug 35397 for details. 7225 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7226 if (BoundCtrl == 0 || BoundCtrl == 1) { 7227 BoundCtrl = 1; 7228 return true; 7229 } 7230 return false; 7231 } 7232 7233 // Note: the order in this table matches the order of operands in AsmString. 7234 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7235 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7236 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7237 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7238 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7239 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7240 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7241 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7242 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7243 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7244 {"", AMDGPUOperand::ImmTyCPol, false, nullptr}, 7245 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7246 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7247 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7248 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7249 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7250 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7251 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7252 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7253 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7254 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7255 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7256 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7257 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7258 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7259 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7260 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7261 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7262 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7263 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7264 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7265 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7266 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7267 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7268 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7269 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7270 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7271 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7272 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7273 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7274 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7275 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7276 }; 7277 7278 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7279 7280 OperandMatchResultTy res = parseOptionalOpr(Operands); 7281 7282 // This is a hack to enable hardcoded mandatory operands which follow 7283 // optional operands. 7284 // 7285 // Current design assumes that all operands after the first optional operand 7286 // are also optional. However implementation of some instructions violates 7287 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7288 // 7289 // To alleviate this problem, we have to (implicitly) parse extra operands 7290 // to make sure autogenerated parser of custom operands never hit hardcoded 7291 // mandatory operands. 7292 7293 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7294 if (res != MatchOperand_Success || 7295 isToken(AsmToken::EndOfStatement)) 7296 break; 7297 7298 trySkipToken(AsmToken::Comma); 7299 res = parseOptionalOpr(Operands); 7300 } 7301 7302 return res; 7303 } 7304 7305 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7306 OperandMatchResultTy res; 7307 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7308 // try to parse any optional operand here 7309 if (Op.IsBit) { 7310 res = parseNamedBit(Op.Name, Operands, Op.Type); 7311 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7312 res = parseOModOperand(Operands); 7313 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7314 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7315 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7316 res = parseSDWASel(Operands, Op.Name, Op.Type); 7317 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7318 res = parseSDWADstUnused(Operands); 7319 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7320 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7321 Op.Type == AMDGPUOperand::ImmTyNegLo || 7322 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7323 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7324 Op.ConvertResult); 7325 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7326 res = parseDim(Operands); 7327 } else if (Op.Type == AMDGPUOperand::ImmTyCPol) { 7328 res = parseCPol(Operands); 7329 } else { 7330 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7331 } 7332 if (res != MatchOperand_NoMatch) { 7333 return res; 7334 } 7335 } 7336 return MatchOperand_NoMatch; 7337 } 7338 7339 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7340 StringRef Name = getTokenStr(); 7341 if (Name == "mul") { 7342 return parseIntWithPrefix("mul", Operands, 7343 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7344 } 7345 7346 if (Name == "div") { 7347 return parseIntWithPrefix("div", Operands, 7348 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7349 } 7350 7351 return MatchOperand_NoMatch; 7352 } 7353 7354 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7355 cvtVOP3P(Inst, Operands); 7356 7357 int Opc = Inst.getOpcode(); 7358 7359 int SrcNum; 7360 const int Ops[] = { AMDGPU::OpName::src0, 7361 AMDGPU::OpName::src1, 7362 AMDGPU::OpName::src2 }; 7363 for (SrcNum = 0; 7364 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7365 ++SrcNum); 7366 assert(SrcNum > 0); 7367 7368 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7369 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7370 7371 if ((OpSel & (1 << SrcNum)) != 0) { 7372 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7373 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7374 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7375 } 7376 } 7377 7378 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7379 // 1. This operand is input modifiers 7380 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7381 // 2. This is not last operand 7382 && Desc.NumOperands > (OpNum + 1) 7383 // 3. Next operand is register class 7384 && Desc.OpInfo[OpNum + 1].RegClass != -1 7385 // 4. Next register is not tied to any other operand 7386 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7387 } 7388 7389 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7390 { 7391 OptionalImmIndexMap OptionalIdx; 7392 unsigned Opc = Inst.getOpcode(); 7393 7394 unsigned I = 1; 7395 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7396 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7397 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7398 } 7399 7400 for (unsigned E = Operands.size(); I != E; ++I) { 7401 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7402 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7403 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7404 } else if (Op.isInterpSlot() || 7405 Op.isInterpAttr() || 7406 Op.isAttrChan()) { 7407 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7408 } else if (Op.isImmModifier()) { 7409 OptionalIdx[Op.getImmTy()] = I; 7410 } else { 7411 llvm_unreachable("unhandled operand type"); 7412 } 7413 } 7414 7415 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7417 } 7418 7419 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7420 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7421 } 7422 7423 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7424 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7425 } 7426 } 7427 7428 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7429 OptionalImmIndexMap &OptionalIdx) { 7430 unsigned Opc = Inst.getOpcode(); 7431 7432 unsigned I = 1; 7433 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7434 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7435 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7436 } 7437 7438 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7439 // This instruction has src modifiers 7440 for (unsigned E = Operands.size(); I != E; ++I) { 7441 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7442 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7443 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7444 } else if (Op.isImmModifier()) { 7445 OptionalIdx[Op.getImmTy()] = I; 7446 } else if (Op.isRegOrImm()) { 7447 Op.addRegOrImmOperands(Inst, 1); 7448 } else { 7449 llvm_unreachable("unhandled operand type"); 7450 } 7451 } 7452 } else { 7453 // No src modifiers 7454 for (unsigned E = Operands.size(); I != E; ++I) { 7455 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7456 if (Op.isMod()) { 7457 OptionalIdx[Op.getImmTy()] = I; 7458 } else { 7459 Op.addRegOrImmOperands(Inst, 1); 7460 } 7461 } 7462 } 7463 7464 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7465 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7466 } 7467 7468 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7470 } 7471 7472 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7473 // it has src2 register operand that is tied to dst operand 7474 // we don't allow modifiers for this operand in assembler so src2_modifiers 7475 // should be 0. 7476 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7477 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7478 Opc == AMDGPU::V_MAC_F32_e64_vi || 7479 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7480 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7481 Opc == AMDGPU::V_MAC_F16_e64_vi || 7482 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7483 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7484 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7485 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7486 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7487 auto it = Inst.begin(); 7488 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7489 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7490 ++it; 7491 // Copy the operand to ensure it's not invalidated when Inst grows. 7492 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7493 } 7494 } 7495 7496 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7497 OptionalImmIndexMap OptionalIdx; 7498 cvtVOP3(Inst, Operands, OptionalIdx); 7499 } 7500 7501 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7502 const OperandVector &Operands) { 7503 OptionalImmIndexMap OptIdx; 7504 const int Opc = Inst.getOpcode(); 7505 const MCInstrDesc &Desc = MII.get(Opc); 7506 7507 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7508 7509 cvtVOP3(Inst, Operands, OptIdx); 7510 7511 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7512 assert(!IsPacked); 7513 Inst.addOperand(Inst.getOperand(0)); 7514 } 7515 7516 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7517 // instruction, and then figure out where to actually put the modifiers 7518 7519 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7520 7521 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7522 if (OpSelHiIdx != -1) { 7523 int DefaultVal = IsPacked ? -1 : 0; 7524 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7525 DefaultVal); 7526 } 7527 7528 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7529 if (NegLoIdx != -1) { 7530 assert(IsPacked); 7531 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7532 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7533 } 7534 7535 const int Ops[] = { AMDGPU::OpName::src0, 7536 AMDGPU::OpName::src1, 7537 AMDGPU::OpName::src2 }; 7538 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7539 AMDGPU::OpName::src1_modifiers, 7540 AMDGPU::OpName::src2_modifiers }; 7541 7542 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7543 7544 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7545 unsigned OpSelHi = 0; 7546 unsigned NegLo = 0; 7547 unsigned NegHi = 0; 7548 7549 if (OpSelHiIdx != -1) { 7550 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7551 } 7552 7553 if (NegLoIdx != -1) { 7554 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7555 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7556 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7557 } 7558 7559 for (int J = 0; J < 3; ++J) { 7560 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7561 if (OpIdx == -1) 7562 break; 7563 7564 uint32_t ModVal = 0; 7565 7566 if ((OpSel & (1 << J)) != 0) 7567 ModVal |= SISrcMods::OP_SEL_0; 7568 7569 if ((OpSelHi & (1 << J)) != 0) 7570 ModVal |= SISrcMods::OP_SEL_1; 7571 7572 if ((NegLo & (1 << J)) != 0) 7573 ModVal |= SISrcMods::NEG; 7574 7575 if ((NegHi & (1 << J)) != 0) 7576 ModVal |= SISrcMods::NEG_HI; 7577 7578 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7579 7580 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7581 } 7582 } 7583 7584 //===----------------------------------------------------------------------===// 7585 // dpp 7586 //===----------------------------------------------------------------------===// 7587 7588 bool AMDGPUOperand::isDPP8() const { 7589 return isImmTy(ImmTyDPP8); 7590 } 7591 7592 bool AMDGPUOperand::isDPPCtrl() const { 7593 using namespace AMDGPU::DPP; 7594 7595 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7596 if (result) { 7597 int64_t Imm = getImm(); 7598 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7599 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7600 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7601 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7602 (Imm == DppCtrl::WAVE_SHL1) || 7603 (Imm == DppCtrl::WAVE_ROL1) || 7604 (Imm == DppCtrl::WAVE_SHR1) || 7605 (Imm == DppCtrl::WAVE_ROR1) || 7606 (Imm == DppCtrl::ROW_MIRROR) || 7607 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7608 (Imm == DppCtrl::BCAST15) || 7609 (Imm == DppCtrl::BCAST31) || 7610 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7611 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7612 } 7613 return false; 7614 } 7615 7616 //===----------------------------------------------------------------------===// 7617 // mAI 7618 //===----------------------------------------------------------------------===// 7619 7620 bool AMDGPUOperand::isBLGP() const { 7621 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7622 } 7623 7624 bool AMDGPUOperand::isCBSZ() const { 7625 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7626 } 7627 7628 bool AMDGPUOperand::isABID() const { 7629 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7630 } 7631 7632 bool AMDGPUOperand::isS16Imm() const { 7633 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7634 } 7635 7636 bool AMDGPUOperand::isU16Imm() const { 7637 return isImm() && isUInt<16>(getImm()); 7638 } 7639 7640 //===----------------------------------------------------------------------===// 7641 // dim 7642 //===----------------------------------------------------------------------===// 7643 7644 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7645 // We want to allow "dim:1D" etc., 7646 // but the initial 1 is tokenized as an integer. 7647 std::string Token; 7648 if (isToken(AsmToken::Integer)) { 7649 SMLoc Loc = getToken().getEndLoc(); 7650 Token = std::string(getTokenStr()); 7651 lex(); 7652 if (getLoc() != Loc) 7653 return false; 7654 } 7655 7656 StringRef Suffix; 7657 if (!parseId(Suffix)) 7658 return false; 7659 Token += Suffix; 7660 7661 StringRef DimId = Token; 7662 if (DimId.startswith("SQ_RSRC_IMG_")) 7663 DimId = DimId.drop_front(12); 7664 7665 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7666 if (!DimInfo) 7667 return false; 7668 7669 Encoding = DimInfo->Encoding; 7670 return true; 7671 } 7672 7673 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7674 if (!isGFX10Plus()) 7675 return MatchOperand_NoMatch; 7676 7677 SMLoc S = getLoc(); 7678 7679 if (!trySkipId("dim", AsmToken::Colon)) 7680 return MatchOperand_NoMatch; 7681 7682 unsigned Encoding; 7683 SMLoc Loc = getLoc(); 7684 if (!parseDimId(Encoding)) { 7685 Error(Loc, "invalid dim value"); 7686 return MatchOperand_ParseFail; 7687 } 7688 7689 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7690 AMDGPUOperand::ImmTyDim)); 7691 return MatchOperand_Success; 7692 } 7693 7694 //===----------------------------------------------------------------------===// 7695 // dpp 7696 //===----------------------------------------------------------------------===// 7697 7698 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7699 SMLoc S = getLoc(); 7700 7701 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7702 return MatchOperand_NoMatch; 7703 7704 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7705 7706 int64_t Sels[8]; 7707 7708 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7709 return MatchOperand_ParseFail; 7710 7711 for (size_t i = 0; i < 8; ++i) { 7712 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7713 return MatchOperand_ParseFail; 7714 7715 SMLoc Loc = getLoc(); 7716 if (getParser().parseAbsoluteExpression(Sels[i])) 7717 return MatchOperand_ParseFail; 7718 if (0 > Sels[i] || 7 < Sels[i]) { 7719 Error(Loc, "expected a 3-bit value"); 7720 return MatchOperand_ParseFail; 7721 } 7722 } 7723 7724 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7725 return MatchOperand_ParseFail; 7726 7727 unsigned DPP8 = 0; 7728 for (size_t i = 0; i < 8; ++i) 7729 DPP8 |= (Sels[i] << (i * 3)); 7730 7731 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7732 return MatchOperand_Success; 7733 } 7734 7735 bool 7736 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7737 const OperandVector &Operands) { 7738 if (Ctrl == "row_newbcast") 7739 return isGFX90A(); 7740 7741 // DPP64 is supported for row_newbcast only. 7742 const MCRegisterInfo *MRI = getMRI(); 7743 if (Operands.size() > 2 && Operands[1]->isReg() && 7744 MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1)) 7745 return false; 7746 7747 if (Ctrl == "row_share" || 7748 Ctrl == "row_xmask") 7749 return isGFX10Plus(); 7750 7751 if (Ctrl == "wave_shl" || 7752 Ctrl == "wave_shr" || 7753 Ctrl == "wave_rol" || 7754 Ctrl == "wave_ror" || 7755 Ctrl == "row_bcast") 7756 return isVI() || isGFX9(); 7757 7758 return Ctrl == "row_mirror" || 7759 Ctrl == "row_half_mirror" || 7760 Ctrl == "quad_perm" || 7761 Ctrl == "row_shl" || 7762 Ctrl == "row_shr" || 7763 Ctrl == "row_ror"; 7764 } 7765 7766 int64_t 7767 AMDGPUAsmParser::parseDPPCtrlPerm() { 7768 // quad_perm:[%d,%d,%d,%d] 7769 7770 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7771 return -1; 7772 7773 int64_t Val = 0; 7774 for (int i = 0; i < 4; ++i) { 7775 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7776 return -1; 7777 7778 int64_t Temp; 7779 SMLoc Loc = getLoc(); 7780 if (getParser().parseAbsoluteExpression(Temp)) 7781 return -1; 7782 if (Temp < 0 || Temp > 3) { 7783 Error(Loc, "expected a 2-bit value"); 7784 return -1; 7785 } 7786 7787 Val += (Temp << i * 2); 7788 } 7789 7790 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7791 return -1; 7792 7793 return Val; 7794 } 7795 7796 int64_t 7797 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7798 using namespace AMDGPU::DPP; 7799 7800 // sel:%d 7801 7802 int64_t Val; 7803 SMLoc Loc = getLoc(); 7804 7805 if (getParser().parseAbsoluteExpression(Val)) 7806 return -1; 7807 7808 struct DppCtrlCheck { 7809 int64_t Ctrl; 7810 int Lo; 7811 int Hi; 7812 }; 7813 7814 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7815 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7816 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7817 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7818 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7819 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7820 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7821 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7822 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7823 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7824 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7825 .Default({-1, 0, 0}); 7826 7827 bool Valid; 7828 if (Check.Ctrl == -1) { 7829 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7830 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7831 } else { 7832 Valid = Check.Lo <= Val && Val <= Check.Hi; 7833 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7834 } 7835 7836 if (!Valid) { 7837 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7838 return -1; 7839 } 7840 7841 return Val; 7842 } 7843 7844 OperandMatchResultTy 7845 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7846 using namespace AMDGPU::DPP; 7847 7848 if (!isToken(AsmToken::Identifier) || 7849 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7850 return MatchOperand_NoMatch; 7851 7852 SMLoc S = getLoc(); 7853 int64_t Val = -1; 7854 StringRef Ctrl; 7855 7856 parseId(Ctrl); 7857 7858 if (Ctrl == "row_mirror") { 7859 Val = DppCtrl::ROW_MIRROR; 7860 } else if (Ctrl == "row_half_mirror") { 7861 Val = DppCtrl::ROW_HALF_MIRROR; 7862 } else { 7863 if (skipToken(AsmToken::Colon, "expected a colon")) { 7864 if (Ctrl == "quad_perm") { 7865 Val = parseDPPCtrlPerm(); 7866 } else { 7867 Val = parseDPPCtrlSel(Ctrl); 7868 } 7869 } 7870 } 7871 7872 if (Val == -1) 7873 return MatchOperand_ParseFail; 7874 7875 Operands.push_back( 7876 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7877 return MatchOperand_Success; 7878 } 7879 7880 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7881 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7882 } 7883 7884 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7885 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7886 } 7887 7888 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7889 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7890 } 7891 7892 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7893 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7894 } 7895 7896 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7897 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7898 } 7899 7900 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7901 OptionalImmIndexMap OptionalIdx; 7902 7903 unsigned I = 1; 7904 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7905 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7906 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7907 } 7908 7909 int Fi = 0; 7910 for (unsigned E = Operands.size(); I != E; ++I) { 7911 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7912 MCOI::TIED_TO); 7913 if (TiedTo != -1) { 7914 assert((unsigned)TiedTo < Inst.getNumOperands()); 7915 // handle tied old or src2 for MAC instructions 7916 Inst.addOperand(Inst.getOperand(TiedTo)); 7917 } 7918 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7919 // Add the register arguments 7920 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7921 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7922 // Skip it. 7923 continue; 7924 } 7925 7926 if (IsDPP8) { 7927 if (Op.isDPP8()) { 7928 Op.addImmOperands(Inst, 1); 7929 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7930 Op.addRegWithFPInputModsOperands(Inst, 2); 7931 } else if (Op.isFI()) { 7932 Fi = Op.getImm(); 7933 } else if (Op.isReg()) { 7934 Op.addRegOperands(Inst, 1); 7935 } else { 7936 llvm_unreachable("Invalid operand type"); 7937 } 7938 } else { 7939 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7940 Op.addRegWithFPInputModsOperands(Inst, 2); 7941 } else if (Op.isDPPCtrl()) { 7942 Op.addImmOperands(Inst, 1); 7943 } else if (Op.isImm()) { 7944 // Handle optional arguments 7945 OptionalIdx[Op.getImmTy()] = I; 7946 } else { 7947 llvm_unreachable("Invalid operand type"); 7948 } 7949 } 7950 } 7951 7952 if (IsDPP8) { 7953 using namespace llvm::AMDGPU::DPP; 7954 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7955 } else { 7956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7957 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7959 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7960 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7961 } 7962 } 7963 } 7964 7965 //===----------------------------------------------------------------------===// 7966 // sdwa 7967 //===----------------------------------------------------------------------===// 7968 7969 OperandMatchResultTy 7970 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7971 AMDGPUOperand::ImmTy Type) { 7972 using namespace llvm::AMDGPU::SDWA; 7973 7974 SMLoc S = getLoc(); 7975 StringRef Value; 7976 OperandMatchResultTy res; 7977 7978 SMLoc StringLoc; 7979 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7980 if (res != MatchOperand_Success) { 7981 return res; 7982 } 7983 7984 int64_t Int; 7985 Int = StringSwitch<int64_t>(Value) 7986 .Case("BYTE_0", SdwaSel::BYTE_0) 7987 .Case("BYTE_1", SdwaSel::BYTE_1) 7988 .Case("BYTE_2", SdwaSel::BYTE_2) 7989 .Case("BYTE_3", SdwaSel::BYTE_3) 7990 .Case("WORD_0", SdwaSel::WORD_0) 7991 .Case("WORD_1", SdwaSel::WORD_1) 7992 .Case("DWORD", SdwaSel::DWORD) 7993 .Default(0xffffffff); 7994 7995 if (Int == 0xffffffff) { 7996 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7997 return MatchOperand_ParseFail; 7998 } 7999 8000 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 8001 return MatchOperand_Success; 8002 } 8003 8004 OperandMatchResultTy 8005 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 8006 using namespace llvm::AMDGPU::SDWA; 8007 8008 SMLoc S = getLoc(); 8009 StringRef Value; 8010 OperandMatchResultTy res; 8011 8012 SMLoc StringLoc; 8013 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 8014 if (res != MatchOperand_Success) { 8015 return res; 8016 } 8017 8018 int64_t Int; 8019 Int = StringSwitch<int64_t>(Value) 8020 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 8021 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 8022 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 8023 .Default(0xffffffff); 8024 8025 if (Int == 0xffffffff) { 8026 Error(StringLoc, "invalid dst_unused value"); 8027 return MatchOperand_ParseFail; 8028 } 8029 8030 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 8031 return MatchOperand_Success; 8032 } 8033 8034 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 8035 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 8036 } 8037 8038 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 8039 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 8040 } 8041 8042 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 8043 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 8044 } 8045 8046 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 8047 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 8048 } 8049 8050 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 8051 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 8052 } 8053 8054 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 8055 uint64_t BasicInstType, 8056 bool SkipDstVcc, 8057 bool SkipSrcVcc) { 8058 using namespace llvm::AMDGPU::SDWA; 8059 8060 OptionalImmIndexMap OptionalIdx; 8061 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 8062 bool SkippedVcc = false; 8063 8064 unsigned I = 1; 8065 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 8066 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 8067 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 8068 } 8069 8070 for (unsigned E = Operands.size(); I != E; ++I) { 8071 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 8072 if (SkipVcc && !SkippedVcc && Op.isReg() && 8073 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 8074 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 8075 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 8076 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 8077 // Skip VCC only if we didn't skip it on previous iteration. 8078 // Note that src0 and src1 occupy 2 slots each because of modifiers. 8079 if (BasicInstType == SIInstrFlags::VOP2 && 8080 ((SkipDstVcc && Inst.getNumOperands() == 1) || 8081 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 8082 SkippedVcc = true; 8083 continue; 8084 } else if (BasicInstType == SIInstrFlags::VOPC && 8085 Inst.getNumOperands() == 0) { 8086 SkippedVcc = true; 8087 continue; 8088 } 8089 } 8090 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 8091 Op.addRegOrImmWithInputModsOperands(Inst, 2); 8092 } else if (Op.isImm()) { 8093 // Handle optional arguments 8094 OptionalIdx[Op.getImmTy()] = I; 8095 } else { 8096 llvm_unreachable("Invalid operand type"); 8097 } 8098 SkippedVcc = false; 8099 } 8100 8101 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 8102 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 8103 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 8104 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 8105 switch (BasicInstType) { 8106 case SIInstrFlags::VOP1: 8107 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8108 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8110 } 8111 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8112 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8113 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8114 break; 8115 8116 case SIInstrFlags::VOP2: 8117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8118 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8119 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8120 } 8121 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8122 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8123 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8125 break; 8126 8127 case SIInstrFlags::VOPC: 8128 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8129 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8130 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8131 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8132 break; 8133 8134 default: 8135 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8136 } 8137 } 8138 8139 // special case v_mac_{f16, f32}: 8140 // it has src2 register operand that is tied to dst operand 8141 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8142 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8143 auto it = Inst.begin(); 8144 std::advance( 8145 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8146 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8147 } 8148 } 8149 8150 //===----------------------------------------------------------------------===// 8151 // mAI 8152 //===----------------------------------------------------------------------===// 8153 8154 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8155 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8156 } 8157 8158 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8159 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8160 } 8161 8162 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8163 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8164 } 8165 8166 /// Force static initialization. 8167 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8168 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8169 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8170 } 8171 8172 #define GET_REGISTER_MATCHER 8173 #define GET_MATCHER_IMPLEMENTATION 8174 #define GET_MNEMONIC_SPELL_CHECKER 8175 #define GET_MNEMONIC_CHECKER 8176 #include "AMDGPUGenAsmMatcher.inc" 8177 8178 // This fuction should be defined after auto-generated include so that we have 8179 // MatchClassKind enum defined 8180 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8181 unsigned Kind) { 8182 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8183 // But MatchInstructionImpl() expects to meet token and fails to validate 8184 // operand. This method checks if we are given immediate operand but expect to 8185 // get corresponding token. 8186 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8187 switch (Kind) { 8188 case MCK_addr64: 8189 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8190 case MCK_gds: 8191 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8192 case MCK_lds: 8193 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8194 case MCK_idxen: 8195 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8196 case MCK_offen: 8197 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8198 case MCK_SSrcB32: 8199 // When operands have expression values, they will return true for isToken, 8200 // because it is not possible to distinguish between a token and an 8201 // expression at parse time. MatchInstructionImpl() will always try to 8202 // match an operand as a token, when isToken returns true, and when the 8203 // name of the expression is not a valid token, the match will fail, 8204 // so we need to handle it here. 8205 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8206 case MCK_SSrcF32: 8207 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8208 case MCK_SoppBrTarget: 8209 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8210 case MCK_VReg32OrOff: 8211 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8212 case MCK_InterpSlot: 8213 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8214 case MCK_Attr: 8215 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8216 case MCK_AttrChan: 8217 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8218 case MCK_ImmSMEMOffset: 8219 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8220 case MCK_SReg_64: 8221 case MCK_SReg_64_XEXEC: 8222 // Null is defined as a 32-bit register but 8223 // it should also be enabled with 64-bit operands. 8224 // The following code enables it for SReg_64 operands 8225 // used as source and destination. Remaining source 8226 // operands are handled in isInlinableImm. 8227 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8228 default: 8229 return Match_InvalidOperand; 8230 } 8231 } 8232 8233 //===----------------------------------------------------------------------===// 8234 // endpgm 8235 //===----------------------------------------------------------------------===// 8236 8237 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8238 SMLoc S = getLoc(); 8239 int64_t Imm = 0; 8240 8241 if (!parseExpr(Imm)) { 8242 // The operand is optional, if not present default to 0 8243 Imm = 0; 8244 } 8245 8246 if (!isUInt<16>(Imm)) { 8247 Error(S, "expected a 16-bit value"); 8248 return MatchOperand_ParseFail; 8249 } 8250 8251 Operands.push_back( 8252 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8253 return MatchOperand_Success; 8254 } 8255 8256 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8257