1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDKernelCodeT.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 12 #include "SIDefines.h" 13 #include "SIInstrInfo.h" 14 #include "SIRegisterInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/SmallBitVector.h" 21 #include "llvm/ADT/StringSet.h" 22 #include "llvm/ADT/Twine.h" 23 #include "llvm/MC/MCAsmInfo.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCExpr.h" 26 #include "llvm/MC/MCInst.h" 27 #include "llvm/MC/MCParser/MCAsmParser.h" 28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 29 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 30 #include "llvm/MC/MCSymbol.h" 31 #include "llvm/Support/AMDGPUMetadata.h" 32 #include "llvm/Support/AMDHSAKernelDescriptor.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/MachineValueType.h" 35 #include "llvm/Support/TargetParser.h" 36 #include "llvm/Support/TargetRegistry.h" 37 38 using namespace llvm; 39 using namespace llvm::AMDGPU; 40 using namespace llvm::amdhsa; 41 42 namespace { 43 44 class AMDGPUAsmParser; 45 46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 47 48 //===----------------------------------------------------------------------===// 49 // Operand 50 //===----------------------------------------------------------------------===// 51 52 class AMDGPUOperand : public MCParsedAsmOperand { 53 enum KindTy { 54 Token, 55 Immediate, 56 Register, 57 Expression 58 } Kind; 59 60 SMLoc StartLoc, EndLoc; 61 const AMDGPUAsmParser *AsmParser; 62 63 public: 64 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 65 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 66 67 using Ptr = std::unique_ptr<AMDGPUOperand>; 68 69 struct Modifiers { 70 bool Abs = false; 71 bool Neg = false; 72 bool Sext = false; 73 74 bool hasFPModifiers() const { return Abs || Neg; } 75 bool hasIntModifiers() const { return Sext; } 76 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 77 78 int64_t getFPModifiersOperand() const { 79 int64_t Operand = 0; 80 Operand |= Abs ? SISrcMods::ABS : 0u; 81 Operand |= Neg ? SISrcMods::NEG : 0u; 82 return Operand; 83 } 84 85 int64_t getIntModifiersOperand() const { 86 int64_t Operand = 0; 87 Operand |= Sext ? SISrcMods::SEXT : 0u; 88 return Operand; 89 } 90 91 int64_t getModifiersOperand() const { 92 assert(!(hasFPModifiers() && hasIntModifiers()) 93 && "fp and int modifiers should not be used simultaneously"); 94 if (hasFPModifiers()) { 95 return getFPModifiersOperand(); 96 } else if (hasIntModifiers()) { 97 return getIntModifiersOperand(); 98 } else { 99 return 0; 100 } 101 } 102 103 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 104 }; 105 106 enum ImmTy { 107 ImmTyNone, 108 ImmTyGDS, 109 ImmTyLDS, 110 ImmTyOffen, 111 ImmTyIdxen, 112 ImmTyAddr64, 113 ImmTyOffset, 114 ImmTyInstOffset, 115 ImmTyOffset0, 116 ImmTyOffset1, 117 ImmTyDLC, 118 ImmTySCCB, 119 ImmTyGLC, 120 ImmTySLC, 121 ImmTySWZ, 122 ImmTyTFE, 123 ImmTyD16, 124 ImmTyClampSI, 125 ImmTyOModSI, 126 ImmTyDPP8, 127 ImmTyDppCtrl, 128 ImmTyDppRowMask, 129 ImmTyDppBankMask, 130 ImmTyDppBoundCtrl, 131 ImmTyDppFi, 132 ImmTySdwaDstSel, 133 ImmTySdwaSrc0Sel, 134 ImmTySdwaSrc1Sel, 135 ImmTySdwaDstUnused, 136 ImmTyDMask, 137 ImmTyDim, 138 ImmTyUNorm, 139 ImmTyDA, 140 ImmTyR128A16, 141 ImmTyA16, 142 ImmTyLWE, 143 ImmTyExpTgt, 144 ImmTyExpCompr, 145 ImmTyExpVM, 146 ImmTyFORMAT, 147 ImmTyHwreg, 148 ImmTyOff, 149 ImmTySendMsg, 150 ImmTyInterpSlot, 151 ImmTyInterpAttr, 152 ImmTyAttrChan, 153 ImmTyOpSel, 154 ImmTyOpSelHi, 155 ImmTyNegLo, 156 ImmTyNegHi, 157 ImmTySwizzle, 158 ImmTyGprIdxMode, 159 ImmTyHigh, 160 ImmTyBLGP, 161 ImmTyCBSZ, 162 ImmTyABID, 163 ImmTyEndpgm, 164 }; 165 166 enum ImmKindTy { 167 ImmKindTyNone, 168 ImmKindTyLiteral, 169 ImmKindTyConst, 170 }; 171 172 private: 173 struct TokOp { 174 const char *Data; 175 unsigned Length; 176 }; 177 178 struct ImmOp { 179 int64_t Val; 180 ImmTy Type; 181 bool IsFPImm; 182 mutable ImmKindTy Kind; 183 Modifiers Mods; 184 }; 185 186 struct RegOp { 187 unsigned RegNo; 188 Modifiers Mods; 189 }; 190 191 union { 192 TokOp Tok; 193 ImmOp Imm; 194 RegOp Reg; 195 const MCExpr *Expr; 196 }; 197 198 public: 199 bool isToken() const override { 200 if (Kind == Token) 201 return true; 202 203 // When parsing operands, we can't always tell if something was meant to be 204 // a token, like 'gds', or an expression that references a global variable. 205 // In this case, we assume the string is an expression, and if we need to 206 // interpret is a token, then we treat the symbol name as the token. 207 return isSymbolRefExpr(); 208 } 209 210 bool isSymbolRefExpr() const { 211 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 212 } 213 214 bool isImm() const override { 215 return Kind == Immediate; 216 } 217 218 void setImmKindNone() const { 219 assert(isImm()); 220 Imm.Kind = ImmKindTyNone; 221 } 222 223 void setImmKindLiteral() const { 224 assert(isImm()); 225 Imm.Kind = ImmKindTyLiteral; 226 } 227 228 void setImmKindConst() const { 229 assert(isImm()); 230 Imm.Kind = ImmKindTyConst; 231 } 232 233 bool IsImmKindLiteral() const { 234 return isImm() && Imm.Kind == ImmKindTyLiteral; 235 } 236 237 bool isImmKindConst() const { 238 return isImm() && Imm.Kind == ImmKindTyConst; 239 } 240 241 bool isInlinableImm(MVT type) const; 242 bool isLiteralImm(MVT type) const; 243 244 bool isRegKind() const { 245 return Kind == Register; 246 } 247 248 bool isReg() const override { 249 return isRegKind() && !hasModifiers(); 250 } 251 252 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 253 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 254 } 255 256 bool isRegOrImmWithInt16InputMods() const { 257 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 258 } 259 260 bool isRegOrImmWithInt32InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 262 } 263 264 bool isRegOrImmWithInt64InputMods() const { 265 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 266 } 267 268 bool isRegOrImmWithFP16InputMods() const { 269 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 270 } 271 272 bool isRegOrImmWithFP32InputMods() const { 273 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 274 } 275 276 bool isRegOrImmWithFP64InputMods() const { 277 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 278 } 279 280 bool isVReg() const { 281 return isRegClass(AMDGPU::VGPR_32RegClassID) || 282 isRegClass(AMDGPU::VReg_64RegClassID) || 283 isRegClass(AMDGPU::VReg_96RegClassID) || 284 isRegClass(AMDGPU::VReg_128RegClassID) || 285 isRegClass(AMDGPU::VReg_160RegClassID) || 286 isRegClass(AMDGPU::VReg_192RegClassID) || 287 isRegClass(AMDGPU::VReg_256RegClassID) || 288 isRegClass(AMDGPU::VReg_512RegClassID) || 289 isRegClass(AMDGPU::VReg_1024RegClassID); 290 } 291 292 bool isVReg32() const { 293 return isRegClass(AMDGPU::VGPR_32RegClassID); 294 } 295 296 bool isVReg32OrOff() const { 297 return isOff() || isVReg32(); 298 } 299 300 bool isNull() const { 301 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 302 } 303 304 bool isVRegWithInputMods() const; 305 306 bool isSDWAOperand(MVT type) const; 307 bool isSDWAFP16Operand() const; 308 bool isSDWAFP32Operand() const; 309 bool isSDWAInt16Operand() const; 310 bool isSDWAInt32Operand() const; 311 312 bool isImmTy(ImmTy ImmT) const { 313 return isImm() && Imm.Type == ImmT; 314 } 315 316 bool isImmModifier() const { 317 return isImm() && Imm.Type != ImmTyNone; 318 } 319 320 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 321 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 322 bool isDMask() const { return isImmTy(ImmTyDMask); } 323 bool isDim() const { return isImmTy(ImmTyDim); } 324 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 325 bool isDA() const { return isImmTy(ImmTyDA); } 326 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 327 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 328 bool isLWE() const { return isImmTy(ImmTyLWE); } 329 bool isOff() const { return isImmTy(ImmTyOff); } 330 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 331 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 332 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 333 bool isOffen() const { return isImmTy(ImmTyOffen); } 334 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 335 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 336 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 337 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 338 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 339 340 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 341 bool isGDS() const { return isImmTy(ImmTyGDS); } 342 bool isLDS() const { return isImmTy(ImmTyLDS); } 343 bool isDLC() const { return isImmTy(ImmTyDLC); } 344 bool isSCCB() const { return isImmTy(ImmTySCCB); } 345 bool isGLC() const { return isImmTy(ImmTyGLC); } 346 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 347 // value of the GLC operand. 348 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 349 bool isSLC() const { return isImmTy(ImmTySLC); } 350 bool isSWZ() const { return isImmTy(ImmTySWZ); } 351 bool isTFE() const { return isImmTy(ImmTyTFE); } 352 bool isD16() const { return isImmTy(ImmTyD16); } 353 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 354 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 355 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 356 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 357 bool isFI() const { return isImmTy(ImmTyDppFi); } 358 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 359 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 360 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 361 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 362 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 363 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 364 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 365 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 366 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 367 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 368 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 369 bool isHigh() const { return isImmTy(ImmTyHigh); } 370 371 bool isMod() const { 372 return isClampSI() || isOModSI(); 373 } 374 375 bool isRegOrImm() const { 376 return isReg() || isImm(); 377 } 378 379 bool isRegClass(unsigned RCID) const; 380 381 bool isInlineValue() const; 382 383 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 384 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 385 } 386 387 bool isSCSrcB16() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 389 } 390 391 bool isSCSrcV2B16() const { 392 return isSCSrcB16(); 393 } 394 395 bool isSCSrcB32() const { 396 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 397 } 398 399 bool isSCSrcB64() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 401 } 402 403 bool isBoolReg() const; 404 405 bool isSCSrcF16() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 407 } 408 409 bool isSCSrcV2F16() const { 410 return isSCSrcF16(); 411 } 412 413 bool isSCSrcF32() const { 414 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 415 } 416 417 bool isSCSrcF64() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 419 } 420 421 bool isSSrcB32() const { 422 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 423 } 424 425 bool isSSrcB16() const { 426 return isSCSrcB16() || isLiteralImm(MVT::i16); 427 } 428 429 bool isSSrcV2B16() const { 430 llvm_unreachable("cannot happen"); 431 return isSSrcB16(); 432 } 433 434 bool isSSrcB64() const { 435 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 436 // See isVSrc64(). 437 return isSCSrcB64() || isLiteralImm(MVT::i64); 438 } 439 440 bool isSSrcF32() const { 441 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 442 } 443 444 bool isSSrcF64() const { 445 return isSCSrcB64() || isLiteralImm(MVT::f64); 446 } 447 448 bool isSSrcF16() const { 449 return isSCSrcB16() || isLiteralImm(MVT::f16); 450 } 451 452 bool isSSrcV2F16() const { 453 llvm_unreachable("cannot happen"); 454 return isSSrcF16(); 455 } 456 457 bool isSSrcV2FP32() const { 458 llvm_unreachable("cannot happen"); 459 return isSSrcF32(); 460 } 461 462 bool isSCSrcV2FP32() const { 463 llvm_unreachable("cannot happen"); 464 return isSCSrcF32(); 465 } 466 467 bool isSSrcV2INT32() const { 468 llvm_unreachable("cannot happen"); 469 return isSSrcB32(); 470 } 471 472 bool isSCSrcV2INT32() const { 473 llvm_unreachable("cannot happen"); 474 return isSCSrcB32(); 475 } 476 477 bool isSSrcOrLdsB32() const { 478 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 479 isLiteralImm(MVT::i32) || isExpr(); 480 } 481 482 bool isVCSrcB32() const { 483 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 484 } 485 486 bool isVCSrcB64() const { 487 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 488 } 489 490 bool isVCSrcB16() const { 491 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 492 } 493 494 bool isVCSrcV2B16() const { 495 return isVCSrcB16(); 496 } 497 498 bool isVCSrcF32() const { 499 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 500 } 501 502 bool isVCSrcF64() const { 503 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 504 } 505 506 bool isVCSrcF16() const { 507 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 508 } 509 510 bool isVCSrcV2F16() const { 511 return isVCSrcF16(); 512 } 513 514 bool isVSrcB32() const { 515 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 516 } 517 518 bool isVSrcB64() const { 519 return isVCSrcF64() || isLiteralImm(MVT::i64); 520 } 521 522 bool isVSrcB16() const { 523 return isVCSrcB16() || isLiteralImm(MVT::i16); 524 } 525 526 bool isVSrcV2B16() const { 527 return isVSrcB16() || isLiteralImm(MVT::v2i16); 528 } 529 530 bool isVCSrcV2FP32() const { 531 return isVCSrcF64(); 532 } 533 534 bool isVSrcV2FP32() const { 535 return isVSrcF64() || isLiteralImm(MVT::v2f32); 536 } 537 538 bool isVCSrcV2INT32() const { 539 return isVCSrcB64(); 540 } 541 542 bool isVSrcV2INT32() const { 543 return isVSrcB64() || isLiteralImm(MVT::v2i32); 544 } 545 546 bool isVSrcF32() const { 547 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 548 } 549 550 bool isVSrcF64() const { 551 return isVCSrcF64() || isLiteralImm(MVT::f64); 552 } 553 554 bool isVSrcF16() const { 555 return isVCSrcF16() || isLiteralImm(MVT::f16); 556 } 557 558 bool isVSrcV2F16() const { 559 return isVSrcF16() || isLiteralImm(MVT::v2f16); 560 } 561 562 bool isVISrcB32() const { 563 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 564 } 565 566 bool isVISrcB16() const { 567 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 568 } 569 570 bool isVISrcV2B16() const { 571 return isVISrcB16(); 572 } 573 574 bool isVISrcF32() const { 575 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 576 } 577 578 bool isVISrcF16() const { 579 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 580 } 581 582 bool isVISrcV2F16() const { 583 return isVISrcF16() || isVISrcB32(); 584 } 585 586 bool isVISrc_64B64() const { 587 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64); 588 } 589 590 bool isVISrc_64F64() const { 591 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64); 592 } 593 594 bool isVISrc_64V2FP32() const { 595 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32); 596 } 597 598 bool isVISrc_64V2INT32() const { 599 return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32); 600 } 601 602 bool isVISrc_256B64() const { 603 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64); 604 } 605 606 bool isVISrc_256F64() const { 607 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64); 608 } 609 610 bool isVISrc_128B16() const { 611 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16); 612 } 613 614 bool isVISrc_128V2B16() const { 615 return isVISrc_128B16(); 616 } 617 618 bool isVISrc_128B32() const { 619 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32); 620 } 621 622 bool isVISrc_128F32() const { 623 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32); 624 } 625 626 bool isVISrc_256V2FP32() const { 627 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32); 628 } 629 630 bool isVISrc_256V2INT32() const { 631 return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32); 632 } 633 634 bool isVISrc_512B32() const { 635 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32); 636 } 637 638 bool isVISrc_512B16() const { 639 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16); 640 } 641 642 bool isVISrc_512V2B16() const { 643 return isVISrc_512B16(); 644 } 645 646 bool isVISrc_512F32() const { 647 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32); 648 } 649 650 bool isVISrc_512F16() const { 651 return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16); 652 } 653 654 bool isVISrc_512V2F16() const { 655 return isVISrc_512F16() || isVISrc_512B32(); 656 } 657 658 bool isVISrc_1024B32() const { 659 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32); 660 } 661 662 bool isVISrc_1024B16() const { 663 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16); 664 } 665 666 bool isVISrc_1024V2B16() const { 667 return isVISrc_1024B16(); 668 } 669 670 bool isVISrc_1024F32() const { 671 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32); 672 } 673 674 bool isVISrc_1024F16() const { 675 return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16); 676 } 677 678 bool isVISrc_1024V2F16() const { 679 return isVISrc_1024F16() || isVISrc_1024B32(); 680 } 681 682 bool isAISrcB32() const { 683 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 684 } 685 686 bool isAISrcB16() const { 687 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 688 } 689 690 bool isAISrcV2B16() const { 691 return isAISrcB16(); 692 } 693 694 bool isAISrcF32() const { 695 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 696 } 697 698 bool isAISrcF16() const { 699 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 700 } 701 702 bool isAISrcV2F16() const { 703 return isAISrcF16() || isAISrcB32(); 704 } 705 706 bool isAISrc_64B64() const { 707 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64); 708 } 709 710 bool isAISrc_64F64() const { 711 return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64); 712 } 713 714 bool isAISrc_128B32() const { 715 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 716 } 717 718 bool isAISrc_128B16() const { 719 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 720 } 721 722 bool isAISrc_128V2B16() const { 723 return isAISrc_128B16(); 724 } 725 726 bool isAISrc_128F32() const { 727 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 728 } 729 730 bool isAISrc_128F16() const { 731 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 732 } 733 734 bool isAISrc_128V2F16() const { 735 return isAISrc_128F16() || isAISrc_128B32(); 736 } 737 738 bool isVISrc_128F16() const { 739 return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16); 740 } 741 742 bool isVISrc_128V2F16() const { 743 return isVISrc_128F16() || isVISrc_128B32(); 744 } 745 746 bool isAISrc_256B64() const { 747 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64); 748 } 749 750 bool isAISrc_256F64() const { 751 return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64); 752 } 753 754 bool isAISrc_512B32() const { 755 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 756 } 757 758 bool isAISrc_512B16() const { 759 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 760 } 761 762 bool isAISrc_512V2B16() const { 763 return isAISrc_512B16(); 764 } 765 766 bool isAISrc_512F32() const { 767 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 768 } 769 770 bool isAISrc_512F16() const { 771 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 772 } 773 774 bool isAISrc_512V2F16() const { 775 return isAISrc_512F16() || isAISrc_512B32(); 776 } 777 778 bool isAISrc_1024B32() const { 779 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 780 } 781 782 bool isAISrc_1024B16() const { 783 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 784 } 785 786 bool isAISrc_1024V2B16() const { 787 return isAISrc_1024B16(); 788 } 789 790 bool isAISrc_1024F32() const { 791 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 792 } 793 794 bool isAISrc_1024F16() const { 795 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 796 } 797 798 bool isAISrc_1024V2F16() const { 799 return isAISrc_1024F16() || isAISrc_1024B32(); 800 } 801 802 bool isKImmFP32() const { 803 return isLiteralImm(MVT::f32); 804 } 805 806 bool isKImmFP16() const { 807 return isLiteralImm(MVT::f16); 808 } 809 810 bool isMem() const override { 811 return false; 812 } 813 814 bool isExpr() const { 815 return Kind == Expression; 816 } 817 818 bool isSoppBrTarget() const { 819 return isExpr() || isImm(); 820 } 821 822 bool isSWaitCnt() const; 823 bool isHwreg() const; 824 bool isSendMsg() const; 825 bool isSwizzle() const; 826 bool isSMRDOffset8() const; 827 bool isSMEMOffset() const; 828 bool isSMRDLiteralOffset() const; 829 bool isDPP8() const; 830 bool isDPPCtrl() const; 831 bool isBLGP() const; 832 bool isCBSZ() const; 833 bool isABID() const; 834 bool isGPRIdxMode() const; 835 bool isS16Imm() const; 836 bool isU16Imm() const; 837 bool isEndpgm() const; 838 839 StringRef getExpressionAsToken() const { 840 assert(isExpr()); 841 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 842 return S->getSymbol().getName(); 843 } 844 845 StringRef getToken() const { 846 assert(isToken()); 847 848 if (Kind == Expression) 849 return getExpressionAsToken(); 850 851 return StringRef(Tok.Data, Tok.Length); 852 } 853 854 int64_t getImm() const { 855 assert(isImm()); 856 return Imm.Val; 857 } 858 859 void setImm(int64_t Val) { 860 assert(isImm()); 861 Imm.Val = Val; 862 } 863 864 ImmTy getImmTy() const { 865 assert(isImm()); 866 return Imm.Type; 867 } 868 869 unsigned getReg() const override { 870 assert(isRegKind()); 871 return Reg.RegNo; 872 } 873 874 SMLoc getStartLoc() const override { 875 return StartLoc; 876 } 877 878 SMLoc getEndLoc() const override { 879 return EndLoc; 880 } 881 882 SMRange getLocRange() const { 883 return SMRange(StartLoc, EndLoc); 884 } 885 886 Modifiers getModifiers() const { 887 assert(isRegKind() || isImmTy(ImmTyNone)); 888 return isRegKind() ? Reg.Mods : Imm.Mods; 889 } 890 891 void setModifiers(Modifiers Mods) { 892 assert(isRegKind() || isImmTy(ImmTyNone)); 893 if (isRegKind()) 894 Reg.Mods = Mods; 895 else 896 Imm.Mods = Mods; 897 } 898 899 bool hasModifiers() const { 900 return getModifiers().hasModifiers(); 901 } 902 903 bool hasFPModifiers() const { 904 return getModifiers().hasFPModifiers(); 905 } 906 907 bool hasIntModifiers() const { 908 return getModifiers().hasIntModifiers(); 909 } 910 911 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 912 913 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 914 915 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 916 917 template <unsigned Bitwidth> 918 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 919 920 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 921 addKImmFPOperands<16>(Inst, N); 922 } 923 924 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 925 addKImmFPOperands<32>(Inst, N); 926 } 927 928 void addRegOperands(MCInst &Inst, unsigned N) const; 929 930 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 931 addRegOperands(Inst, N); 932 } 933 934 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 935 if (isRegKind()) 936 addRegOperands(Inst, N); 937 else if (isExpr()) 938 Inst.addOperand(MCOperand::createExpr(Expr)); 939 else 940 addImmOperands(Inst, N); 941 } 942 943 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 944 Modifiers Mods = getModifiers(); 945 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 946 if (isRegKind()) { 947 addRegOperands(Inst, N); 948 } else { 949 addImmOperands(Inst, N, false); 950 } 951 } 952 953 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 954 assert(!hasIntModifiers()); 955 addRegOrImmWithInputModsOperands(Inst, N); 956 } 957 958 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 959 assert(!hasFPModifiers()); 960 addRegOrImmWithInputModsOperands(Inst, N); 961 } 962 963 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 964 Modifiers Mods = getModifiers(); 965 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 966 assert(isRegKind()); 967 addRegOperands(Inst, N); 968 } 969 970 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 971 assert(!hasIntModifiers()); 972 addRegWithInputModsOperands(Inst, N); 973 } 974 975 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 976 assert(!hasFPModifiers()); 977 addRegWithInputModsOperands(Inst, N); 978 } 979 980 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 981 if (isImm()) 982 addImmOperands(Inst, N); 983 else { 984 assert(isExpr()); 985 Inst.addOperand(MCOperand::createExpr(Expr)); 986 } 987 } 988 989 static void printImmTy(raw_ostream& OS, ImmTy Type) { 990 switch (Type) { 991 case ImmTyNone: OS << "None"; break; 992 case ImmTyGDS: OS << "GDS"; break; 993 case ImmTyLDS: OS << "LDS"; break; 994 case ImmTyOffen: OS << "Offen"; break; 995 case ImmTyIdxen: OS << "Idxen"; break; 996 case ImmTyAddr64: OS << "Addr64"; break; 997 case ImmTyOffset: OS << "Offset"; break; 998 case ImmTyInstOffset: OS << "InstOffset"; break; 999 case ImmTyOffset0: OS << "Offset0"; break; 1000 case ImmTyOffset1: OS << "Offset1"; break; 1001 case ImmTyDLC: OS << "DLC"; break; 1002 case ImmTySCCB: OS << "SCCB"; break; 1003 case ImmTyGLC: OS << "GLC"; break; 1004 case ImmTySLC: OS << "SLC"; break; 1005 case ImmTySWZ: OS << "SWZ"; break; 1006 case ImmTyTFE: OS << "TFE"; break; 1007 case ImmTyD16: OS << "D16"; break; 1008 case ImmTyFORMAT: OS << "FORMAT"; break; 1009 case ImmTyClampSI: OS << "ClampSI"; break; 1010 case ImmTyOModSI: OS << "OModSI"; break; 1011 case ImmTyDPP8: OS << "DPP8"; break; 1012 case ImmTyDppCtrl: OS << "DppCtrl"; break; 1013 case ImmTyDppRowMask: OS << "DppRowMask"; break; 1014 case ImmTyDppBankMask: OS << "DppBankMask"; break; 1015 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 1016 case ImmTyDppFi: OS << "FI"; break; 1017 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 1018 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 1019 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 1020 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 1021 case ImmTyDMask: OS << "DMask"; break; 1022 case ImmTyDim: OS << "Dim"; break; 1023 case ImmTyUNorm: OS << "UNorm"; break; 1024 case ImmTyDA: OS << "DA"; break; 1025 case ImmTyR128A16: OS << "R128A16"; break; 1026 case ImmTyA16: OS << "A16"; break; 1027 case ImmTyLWE: OS << "LWE"; break; 1028 case ImmTyOff: OS << "Off"; break; 1029 case ImmTyExpTgt: OS << "ExpTgt"; break; 1030 case ImmTyExpCompr: OS << "ExpCompr"; break; 1031 case ImmTyExpVM: OS << "ExpVM"; break; 1032 case ImmTyHwreg: OS << "Hwreg"; break; 1033 case ImmTySendMsg: OS << "SendMsg"; break; 1034 case ImmTyInterpSlot: OS << "InterpSlot"; break; 1035 case ImmTyInterpAttr: OS << "InterpAttr"; break; 1036 case ImmTyAttrChan: OS << "AttrChan"; break; 1037 case ImmTyOpSel: OS << "OpSel"; break; 1038 case ImmTyOpSelHi: OS << "OpSelHi"; break; 1039 case ImmTyNegLo: OS << "NegLo"; break; 1040 case ImmTyNegHi: OS << "NegHi"; break; 1041 case ImmTySwizzle: OS << "Swizzle"; break; 1042 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 1043 case ImmTyHigh: OS << "High"; break; 1044 case ImmTyBLGP: OS << "BLGP"; break; 1045 case ImmTyCBSZ: OS << "CBSZ"; break; 1046 case ImmTyABID: OS << "ABID"; break; 1047 case ImmTyEndpgm: OS << "Endpgm"; break; 1048 } 1049 } 1050 1051 void print(raw_ostream &OS) const override { 1052 switch (Kind) { 1053 case Register: 1054 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 1055 break; 1056 case Immediate: 1057 OS << '<' << getImm(); 1058 if (getImmTy() != ImmTyNone) { 1059 OS << " type: "; printImmTy(OS, getImmTy()); 1060 } 1061 OS << " mods: " << Imm.Mods << '>'; 1062 break; 1063 case Token: 1064 OS << '\'' << getToken() << '\''; 1065 break; 1066 case Expression: 1067 OS << "<expr " << *Expr << '>'; 1068 break; 1069 } 1070 } 1071 1072 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 1073 int64_t Val, SMLoc Loc, 1074 ImmTy Type = ImmTyNone, 1075 bool IsFPImm = false) { 1076 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 1077 Op->Imm.Val = Val; 1078 Op->Imm.IsFPImm = IsFPImm; 1079 Op->Imm.Kind = ImmKindTyNone; 1080 Op->Imm.Type = Type; 1081 Op->Imm.Mods = Modifiers(); 1082 Op->StartLoc = Loc; 1083 Op->EndLoc = Loc; 1084 return Op; 1085 } 1086 1087 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 1088 StringRef Str, SMLoc Loc, 1089 bool HasExplicitEncodingSize = true) { 1090 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 1091 Res->Tok.Data = Str.data(); 1092 Res->Tok.Length = Str.size(); 1093 Res->StartLoc = Loc; 1094 Res->EndLoc = Loc; 1095 return Res; 1096 } 1097 1098 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 1099 unsigned RegNo, SMLoc S, 1100 SMLoc E) { 1101 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 1102 Op->Reg.RegNo = RegNo; 1103 Op->Reg.Mods = Modifiers(); 1104 Op->StartLoc = S; 1105 Op->EndLoc = E; 1106 return Op; 1107 } 1108 1109 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 1110 const class MCExpr *Expr, SMLoc S) { 1111 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 1112 Op->Expr = Expr; 1113 Op->StartLoc = S; 1114 Op->EndLoc = S; 1115 return Op; 1116 } 1117 }; 1118 1119 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 1120 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 1121 return OS; 1122 } 1123 1124 //===----------------------------------------------------------------------===// 1125 // AsmParser 1126 //===----------------------------------------------------------------------===// 1127 1128 // Holds info related to the current kernel, e.g. count of SGPRs used. 1129 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 1130 // .amdgpu_hsa_kernel or at EOF. 1131 class KernelScopeInfo { 1132 int SgprIndexUnusedMin = -1; 1133 int VgprIndexUnusedMin = -1; 1134 MCContext *Ctx = nullptr; 1135 1136 void usesSgprAt(int i) { 1137 if (i >= SgprIndexUnusedMin) { 1138 SgprIndexUnusedMin = ++i; 1139 if (Ctx) { 1140 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1141 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1142 } 1143 } 1144 } 1145 1146 void usesVgprAt(int i) { 1147 if (i >= VgprIndexUnusedMin) { 1148 VgprIndexUnusedMin = ++i; 1149 if (Ctx) { 1150 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1151 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1152 } 1153 } 1154 } 1155 1156 public: 1157 KernelScopeInfo() = default; 1158 1159 void initialize(MCContext &Context) { 1160 Ctx = &Context; 1161 usesSgprAt(SgprIndexUnusedMin = -1); 1162 usesVgprAt(VgprIndexUnusedMin = -1); 1163 } 1164 1165 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1166 switch (RegKind) { 1167 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1168 case IS_AGPR: // fall through 1169 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1170 default: break; 1171 } 1172 } 1173 }; 1174 1175 class AMDGPUAsmParser : public MCTargetAsmParser { 1176 MCAsmParser &Parser; 1177 1178 // Number of extra operands parsed after the first optional operand. 1179 // This may be necessary to skip hardcoded mandatory operands. 1180 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1181 1182 unsigned ForcedEncodingSize = 0; 1183 bool ForcedDPP = false; 1184 bool ForcedSDWA = false; 1185 KernelScopeInfo KernelScope; 1186 1187 /// @name Auto-generated Match Functions 1188 /// { 1189 1190 #define GET_ASSEMBLER_HEADER 1191 #include "AMDGPUGenAsmMatcher.inc" 1192 1193 /// } 1194 1195 private: 1196 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1197 bool OutOfRangeError(SMRange Range); 1198 /// Calculate VGPR/SGPR blocks required for given target, reserved 1199 /// registers, and user-specified NextFreeXGPR values. 1200 /// 1201 /// \param Features [in] Target features, used for bug corrections. 1202 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1203 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1204 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1205 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1206 /// descriptor field, if valid. 1207 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1208 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1209 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1210 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1211 /// \param VGPRBlocks [out] Result VGPR block count. 1212 /// \param SGPRBlocks [out] Result SGPR block count. 1213 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1214 bool FlatScrUsed, bool XNACKUsed, 1215 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1216 SMRange VGPRRange, unsigned NextFreeSGPR, 1217 SMRange SGPRRange, unsigned &VGPRBlocks, 1218 unsigned &SGPRBlocks); 1219 bool ParseDirectiveAMDGCNTarget(); 1220 bool ParseDirectiveAMDHSAKernel(); 1221 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1222 bool ParseDirectiveHSACodeObjectVersion(); 1223 bool ParseDirectiveHSACodeObjectISA(); 1224 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1225 bool ParseDirectiveAMDKernelCodeT(); 1226 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1227 bool ParseDirectiveAMDGPUHsaKernel(); 1228 1229 bool ParseDirectiveISAVersion(); 1230 bool ParseDirectiveHSAMetadata(); 1231 bool ParseDirectivePALMetadataBegin(); 1232 bool ParseDirectivePALMetadata(); 1233 bool ParseDirectiveAMDGPULDS(); 1234 1235 /// Common code to parse out a block of text (typically YAML) between start and 1236 /// end directives. 1237 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1238 const char *AssemblerDirectiveEnd, 1239 std::string &CollectString); 1240 1241 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1242 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1243 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1244 unsigned &RegNum, unsigned &RegWidth, 1245 bool RestoreOnFailure = false); 1246 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1247 unsigned &RegNum, unsigned &RegWidth, 1248 SmallVectorImpl<AsmToken> &Tokens); 1249 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1250 unsigned &RegWidth, 1251 SmallVectorImpl<AsmToken> &Tokens); 1252 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1253 unsigned &RegWidth, 1254 SmallVectorImpl<AsmToken> &Tokens); 1255 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1256 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1257 bool ParseRegRange(unsigned& Num, unsigned& Width); 1258 unsigned getRegularReg(RegisterKind RegKind, 1259 unsigned RegNum, 1260 unsigned RegWidth, 1261 SMLoc Loc); 1262 1263 bool isRegister(); 1264 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1265 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1266 void initializeGprCountSymbol(RegisterKind RegKind); 1267 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1268 unsigned RegWidth); 1269 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1270 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1271 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1272 bool IsGdsHardcoded); 1273 1274 public: 1275 enum AMDGPUMatchResultTy { 1276 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1277 }; 1278 enum OperandMode { 1279 OperandMode_Default, 1280 OperandMode_NSA, 1281 }; 1282 1283 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1284 1285 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1286 const MCInstrInfo &MII, 1287 const MCTargetOptions &Options) 1288 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1289 MCAsmParserExtension::Initialize(Parser); 1290 1291 if (getFeatureBits().none()) { 1292 // Set default features. 1293 copySTI().ToggleFeature("southern-islands"); 1294 } 1295 1296 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1297 1298 { 1299 // TODO: make those pre-defined variables read-only. 1300 // Currently there is none suitable machinery in the core llvm-mc for this. 1301 // MCSymbol::isRedefinable is intended for another purpose, and 1302 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1303 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1304 MCContext &Ctx = getContext(); 1305 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1306 MCSymbol *Sym = 1307 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1308 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1309 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1310 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1311 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1312 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1313 } else { 1314 MCSymbol *Sym = 1315 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1316 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1317 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1318 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1319 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1320 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1321 } 1322 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1323 initializeGprCountSymbol(IS_VGPR); 1324 initializeGprCountSymbol(IS_SGPR); 1325 } else 1326 KernelScope.initialize(getContext()); 1327 } 1328 } 1329 1330 bool hasXNACK() const { 1331 return AMDGPU::hasXNACK(getSTI()); 1332 } 1333 1334 bool hasMIMG_R128() const { 1335 return AMDGPU::hasMIMG_R128(getSTI()); 1336 } 1337 1338 bool hasPackedD16() const { 1339 return AMDGPU::hasPackedD16(getSTI()); 1340 } 1341 1342 bool hasGFX10A16() const { 1343 return AMDGPU::hasGFX10A16(getSTI()); 1344 } 1345 1346 bool isSI() const { 1347 return AMDGPU::isSI(getSTI()); 1348 } 1349 1350 bool isCI() const { 1351 return AMDGPU::isCI(getSTI()); 1352 } 1353 1354 bool isVI() const { 1355 return AMDGPU::isVI(getSTI()); 1356 } 1357 1358 bool isGFX9() const { 1359 return AMDGPU::isGFX9(getSTI()); 1360 } 1361 1362 bool isGFX90A() const { 1363 return AMDGPU::isGFX90A(getSTI()); 1364 } 1365 1366 bool isGFX9Plus() const { 1367 return AMDGPU::isGFX9Plus(getSTI()); 1368 } 1369 1370 bool isGFX10() const { 1371 return AMDGPU::isGFX10(getSTI()); 1372 } 1373 1374 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1375 1376 bool isGFX10_BEncoding() const { 1377 return AMDGPU::isGFX10_BEncoding(getSTI()); 1378 } 1379 1380 bool hasInv2PiInlineImm() const { 1381 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1382 } 1383 1384 bool hasFlatOffsets() const { 1385 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1386 } 1387 1388 bool hasSGPR102_SGPR103() const { 1389 return !isVI() && !isGFX9(); 1390 } 1391 1392 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1393 1394 bool hasIntClamp() const { 1395 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1396 } 1397 1398 AMDGPUTargetStreamer &getTargetStreamer() { 1399 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1400 return static_cast<AMDGPUTargetStreamer &>(TS); 1401 } 1402 1403 const MCRegisterInfo *getMRI() const { 1404 // We need this const_cast because for some reason getContext() is not const 1405 // in MCAsmParser. 1406 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1407 } 1408 1409 const MCInstrInfo *getMII() const { 1410 return &MII; 1411 } 1412 1413 const FeatureBitset &getFeatureBits() const { 1414 return getSTI().getFeatureBits(); 1415 } 1416 1417 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1418 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1419 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1420 1421 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1422 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1423 bool isForcedDPP() const { return ForcedDPP; } 1424 bool isForcedSDWA() const { return ForcedSDWA; } 1425 ArrayRef<unsigned> getMatchedVariants() const; 1426 StringRef getMatchedVariantName() const; 1427 1428 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1429 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1430 bool RestoreOnFailure); 1431 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1432 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1433 SMLoc &EndLoc) override; 1434 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1435 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1436 unsigned Kind) override; 1437 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1438 OperandVector &Operands, MCStreamer &Out, 1439 uint64_t &ErrorInfo, 1440 bool MatchingInlineAsm) override; 1441 bool ParseDirective(AsmToken DirectiveID) override; 1442 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1443 OperandMode Mode = OperandMode_Default); 1444 StringRef parseMnemonicSuffix(StringRef Name); 1445 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1446 SMLoc NameLoc, OperandVector &Operands) override; 1447 //bool ProcessInstruction(MCInst &Inst); 1448 1449 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1450 1451 OperandMatchResultTy 1452 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1453 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1454 bool (*ConvertResult)(int64_t &) = nullptr); 1455 1456 OperandMatchResultTy 1457 parseOperandArrayWithPrefix(const char *Prefix, 1458 OperandVector &Operands, 1459 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1460 bool (*ConvertResult)(int64_t&) = nullptr); 1461 1462 OperandMatchResultTy 1463 parseNamedBit(StringRef Name, OperandVector &Operands, 1464 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1465 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1466 StringRef &Value, 1467 SMLoc &StringLoc); 1468 1469 bool isModifier(); 1470 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1471 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1472 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1473 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1474 bool parseSP3NegModifier(); 1475 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1476 OperandMatchResultTy parseReg(OperandVector &Operands); 1477 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1478 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1479 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1480 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1481 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1482 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1483 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1484 OperandMatchResultTy parseUfmt(int64_t &Format); 1485 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1486 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1487 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1488 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1489 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1490 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1491 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1492 1493 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1494 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1495 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1496 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1497 1498 bool parseCnt(int64_t &IntVal); 1499 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1500 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1501 1502 private: 1503 struct OperandInfoTy { 1504 SMLoc Loc; 1505 int64_t Id; 1506 bool IsSymbolic = false; 1507 bool IsDefined = false; 1508 1509 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1510 }; 1511 1512 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1513 bool validateSendMsg(const OperandInfoTy &Msg, 1514 const OperandInfoTy &Op, 1515 const OperandInfoTy &Stream); 1516 1517 bool parseHwregBody(OperandInfoTy &HwReg, 1518 OperandInfoTy &Offset, 1519 OperandInfoTy &Width); 1520 bool validateHwreg(const OperandInfoTy &HwReg, 1521 const OperandInfoTy &Offset, 1522 const OperandInfoTy &Width); 1523 1524 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1525 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1526 1527 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1528 const OperandVector &Operands) const; 1529 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1530 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1531 SMLoc getLitLoc(const OperandVector &Operands) const; 1532 SMLoc getConstLoc(const OperandVector &Operands) const; 1533 1534 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1535 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1536 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1537 bool validateSOPLiteral(const MCInst &Inst) const; 1538 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1539 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1540 bool validateIntClampSupported(const MCInst &Inst); 1541 bool validateMIMGAtomicDMask(const MCInst &Inst); 1542 bool validateMIMGGatherDMask(const MCInst &Inst); 1543 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1544 bool validateMIMGDataSize(const MCInst &Inst); 1545 bool validateMIMGAddrSize(const MCInst &Inst); 1546 bool validateMIMGD16(const MCInst &Inst); 1547 bool validateMIMGDim(const MCInst &Inst); 1548 bool validateMIMGMSAA(const MCInst &Inst); 1549 bool validateLdsDirect(const MCInst &Inst); 1550 bool validateOpSel(const MCInst &Inst); 1551 bool validateVccOperand(unsigned Reg) const; 1552 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1553 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1554 bool validateAGPRLdSt(const MCInst &Inst) const; 1555 bool validateVGPRAlign(const MCInst &Inst) const; 1556 bool validateDivScale(const MCInst &Inst); 1557 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1558 const SMLoc &IDLoc); 1559 unsigned getConstantBusLimit(unsigned Opcode) const; 1560 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1561 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1562 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1563 1564 bool isSupportedMnemo(StringRef Mnemo, 1565 const FeatureBitset &FBS); 1566 bool isSupportedMnemo(StringRef Mnemo, 1567 const FeatureBitset &FBS, 1568 ArrayRef<unsigned> Variants); 1569 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1570 1571 bool isId(const StringRef Id) const; 1572 bool isId(const AsmToken &Token, const StringRef Id) const; 1573 bool isToken(const AsmToken::TokenKind Kind) const; 1574 bool trySkipId(const StringRef Id); 1575 bool trySkipId(const StringRef Pref, const StringRef Id); 1576 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1577 bool trySkipToken(const AsmToken::TokenKind Kind); 1578 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1579 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1580 bool parseId(StringRef &Val, const StringRef ErrMsg = ""); 1581 1582 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1583 AsmToken::TokenKind getTokenKind() const; 1584 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1585 bool parseExpr(OperandVector &Operands); 1586 StringRef getTokenStr() const; 1587 AsmToken peekToken(); 1588 AsmToken getToken() const; 1589 SMLoc getLoc() const; 1590 void lex(); 1591 1592 public: 1593 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1594 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1595 1596 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1597 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1598 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1599 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1600 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1601 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1602 1603 bool parseSwizzleOperand(int64_t &Op, 1604 const unsigned MinVal, 1605 const unsigned MaxVal, 1606 const StringRef ErrMsg, 1607 SMLoc &Loc); 1608 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1609 const unsigned MinVal, 1610 const unsigned MaxVal, 1611 const StringRef ErrMsg); 1612 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1613 bool parseSwizzleOffset(int64_t &Imm); 1614 bool parseSwizzleMacro(int64_t &Imm); 1615 bool parseSwizzleQuadPerm(int64_t &Imm); 1616 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1617 bool parseSwizzleBroadcast(int64_t &Imm); 1618 bool parseSwizzleSwap(int64_t &Imm); 1619 bool parseSwizzleReverse(int64_t &Imm); 1620 1621 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1622 int64_t parseGPRIdxMacro(); 1623 1624 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1625 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1626 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1627 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1628 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1629 1630 AMDGPUOperand::Ptr defaultDLC() const; 1631 AMDGPUOperand::Ptr defaultSCCB() const; 1632 AMDGPUOperand::Ptr defaultGLC() const; 1633 AMDGPUOperand::Ptr defaultGLC_1() const; 1634 AMDGPUOperand::Ptr defaultSLC() const; 1635 1636 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1637 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1638 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1639 AMDGPUOperand::Ptr defaultFlatOffset() const; 1640 1641 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1642 1643 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1644 OptionalImmIndexMap &OptionalIdx); 1645 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1646 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1647 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1648 1649 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1650 1651 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1652 bool IsAtomic = false); 1653 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1654 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1655 1656 bool parseDimId(unsigned &Encoding); 1657 OperandMatchResultTy parseDim(OperandVector &Operands); 1658 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1659 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1660 bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands); 1661 int64_t parseDPPCtrlSel(StringRef Ctrl); 1662 int64_t parseDPPCtrlPerm(); 1663 AMDGPUOperand::Ptr defaultRowMask() const; 1664 AMDGPUOperand::Ptr defaultBankMask() const; 1665 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1666 AMDGPUOperand::Ptr defaultFI() const; 1667 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1668 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1669 1670 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1671 AMDGPUOperand::ImmTy Type); 1672 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1673 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1674 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1675 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1676 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1677 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1678 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1679 uint64_t BasicInstType, 1680 bool SkipDstVcc = false, 1681 bool SkipSrcVcc = false); 1682 1683 AMDGPUOperand::Ptr defaultBLGP() const; 1684 AMDGPUOperand::Ptr defaultCBSZ() const; 1685 AMDGPUOperand::Ptr defaultABID() const; 1686 1687 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1688 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1689 }; 1690 1691 struct OptionalOperand { 1692 const char *Name; 1693 AMDGPUOperand::ImmTy Type; 1694 bool IsBit; 1695 bool (*ConvertResult)(int64_t&); 1696 }; 1697 1698 } // end anonymous namespace 1699 1700 // May be called with integer type with equivalent bitwidth. 1701 static const fltSemantics *getFltSemantics(unsigned Size) { 1702 switch (Size) { 1703 case 4: 1704 return &APFloat::IEEEsingle(); 1705 case 8: 1706 return &APFloat::IEEEdouble(); 1707 case 2: 1708 return &APFloat::IEEEhalf(); 1709 default: 1710 llvm_unreachable("unsupported fp type"); 1711 } 1712 } 1713 1714 static const fltSemantics *getFltSemantics(MVT VT) { 1715 return getFltSemantics(VT.getSizeInBits() / 8); 1716 } 1717 1718 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1719 switch (OperandType) { 1720 case AMDGPU::OPERAND_REG_IMM_INT32: 1721 case AMDGPU::OPERAND_REG_IMM_FP32: 1722 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1723 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1724 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1725 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1726 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1727 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1728 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 1729 case AMDGPU::OPERAND_REG_IMM_V2INT32: 1730 return &APFloat::IEEEsingle(); 1731 case AMDGPU::OPERAND_REG_IMM_INT64: 1732 case AMDGPU::OPERAND_REG_IMM_FP64: 1733 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1734 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1735 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1736 return &APFloat::IEEEdouble(); 1737 case AMDGPU::OPERAND_REG_IMM_INT16: 1738 case AMDGPU::OPERAND_REG_IMM_FP16: 1739 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1740 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1741 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1742 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1743 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1744 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1745 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1746 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1747 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1748 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1749 return &APFloat::IEEEhalf(); 1750 default: 1751 llvm_unreachable("unsupported fp type"); 1752 } 1753 } 1754 1755 //===----------------------------------------------------------------------===// 1756 // Operand 1757 //===----------------------------------------------------------------------===// 1758 1759 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1760 bool Lost; 1761 1762 // Convert literal to single precision 1763 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1764 APFloat::rmNearestTiesToEven, 1765 &Lost); 1766 // We allow precision lost but not overflow or underflow 1767 if (Status != APFloat::opOK && 1768 Lost && 1769 ((Status & APFloat::opOverflow) != 0 || 1770 (Status & APFloat::opUnderflow) != 0)) { 1771 return false; 1772 } 1773 1774 return true; 1775 } 1776 1777 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1778 return isUIntN(Size, Val) || isIntN(Size, Val); 1779 } 1780 1781 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1782 if (VT.getScalarType() == MVT::i16) { 1783 // FP immediate values are broken. 1784 return isInlinableIntLiteral(Val); 1785 } 1786 1787 // f16/v2f16 operands work correctly for all values. 1788 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1789 } 1790 1791 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1792 1793 // This is a hack to enable named inline values like 1794 // shared_base with both 32-bit and 64-bit operands. 1795 // Note that these values are defined as 1796 // 32-bit operands only. 1797 if (isInlineValue()) { 1798 return true; 1799 } 1800 1801 if (!isImmTy(ImmTyNone)) { 1802 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1803 return false; 1804 } 1805 // TODO: We should avoid using host float here. It would be better to 1806 // check the float bit values which is what a few other places do. 1807 // We've had bot failures before due to weird NaN support on mips hosts. 1808 1809 APInt Literal(64, Imm.Val); 1810 1811 if (Imm.IsFPImm) { // We got fp literal token 1812 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1813 return AMDGPU::isInlinableLiteral64(Imm.Val, 1814 AsmParser->hasInv2PiInlineImm()); 1815 } 1816 1817 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1818 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1819 return false; 1820 1821 if (type.getScalarSizeInBits() == 16) { 1822 return isInlineableLiteralOp16( 1823 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1824 type, AsmParser->hasInv2PiInlineImm()); 1825 } 1826 1827 // Check if single precision literal is inlinable 1828 return AMDGPU::isInlinableLiteral32( 1829 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1830 AsmParser->hasInv2PiInlineImm()); 1831 } 1832 1833 // We got int literal token. 1834 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1835 return AMDGPU::isInlinableLiteral64(Imm.Val, 1836 AsmParser->hasInv2PiInlineImm()); 1837 } 1838 1839 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1840 return false; 1841 } 1842 1843 if (type.getScalarSizeInBits() == 16) { 1844 return isInlineableLiteralOp16( 1845 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1846 type, AsmParser->hasInv2PiInlineImm()); 1847 } 1848 1849 return AMDGPU::isInlinableLiteral32( 1850 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1851 AsmParser->hasInv2PiInlineImm()); 1852 } 1853 1854 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1855 // Check that this immediate can be added as literal 1856 if (!isImmTy(ImmTyNone)) { 1857 return false; 1858 } 1859 1860 if (!Imm.IsFPImm) { 1861 // We got int literal token. 1862 1863 if (type == MVT::f64 && hasFPModifiers()) { 1864 // Cannot apply fp modifiers to int literals preserving the same semantics 1865 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1866 // disable these cases. 1867 return false; 1868 } 1869 1870 unsigned Size = type.getSizeInBits(); 1871 if (Size == 64) 1872 Size = 32; 1873 1874 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1875 // types. 1876 return isSafeTruncation(Imm.Val, Size); 1877 } 1878 1879 // We got fp literal token 1880 if (type == MVT::f64) { // Expected 64-bit fp operand 1881 // We would set low 64-bits of literal to zeroes but we accept this literals 1882 return true; 1883 } 1884 1885 if (type == MVT::i64) { // Expected 64-bit int operand 1886 // We don't allow fp literals in 64-bit integer instructions. It is 1887 // unclear how we should encode them. 1888 return false; 1889 } 1890 1891 // We allow fp literals with f16x2 operands assuming that the specified 1892 // literal goes into the lower half and the upper half is zero. We also 1893 // require that the literal may be losslesly converted to f16. 1894 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1895 (type == MVT::v2i16)? MVT::i16 : 1896 (type == MVT::v2f32)? MVT::f32 : type; 1897 1898 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1899 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1900 } 1901 1902 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1903 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1904 } 1905 1906 bool AMDGPUOperand::isVRegWithInputMods() const { 1907 return isRegClass(AMDGPU::VGPR_32RegClassID) || 1908 // GFX90A allows DPP on 64-bit operands. 1909 (isRegClass(AMDGPU::VReg_64RegClassID) && 1910 AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]); 1911 } 1912 1913 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1914 if (AsmParser->isVI()) 1915 return isVReg32(); 1916 else if (AsmParser->isGFX9Plus()) 1917 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1918 else 1919 return false; 1920 } 1921 1922 bool AMDGPUOperand::isSDWAFP16Operand() const { 1923 return isSDWAOperand(MVT::f16); 1924 } 1925 1926 bool AMDGPUOperand::isSDWAFP32Operand() const { 1927 return isSDWAOperand(MVT::f32); 1928 } 1929 1930 bool AMDGPUOperand::isSDWAInt16Operand() const { 1931 return isSDWAOperand(MVT::i16); 1932 } 1933 1934 bool AMDGPUOperand::isSDWAInt32Operand() const { 1935 return isSDWAOperand(MVT::i32); 1936 } 1937 1938 bool AMDGPUOperand::isBoolReg() const { 1939 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1940 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1941 } 1942 1943 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1944 { 1945 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1946 assert(Size == 2 || Size == 4 || Size == 8); 1947 1948 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1949 1950 if (Imm.Mods.Abs) { 1951 Val &= ~FpSignMask; 1952 } 1953 if (Imm.Mods.Neg) { 1954 Val ^= FpSignMask; 1955 } 1956 1957 return Val; 1958 } 1959 1960 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1961 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1962 Inst.getNumOperands())) { 1963 addLiteralImmOperand(Inst, Imm.Val, 1964 ApplyModifiers & 1965 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1966 } else { 1967 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1968 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1969 setImmKindNone(); 1970 } 1971 } 1972 1973 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1974 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1975 auto OpNum = Inst.getNumOperands(); 1976 // Check that this operand accepts literals 1977 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1978 1979 if (ApplyModifiers) { 1980 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1981 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1982 Val = applyInputFPModifiers(Val, Size); 1983 } 1984 1985 APInt Literal(64, Val); 1986 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1987 1988 if (Imm.IsFPImm) { // We got fp literal token 1989 switch (OpTy) { 1990 case AMDGPU::OPERAND_REG_IMM_INT64: 1991 case AMDGPU::OPERAND_REG_IMM_FP64: 1992 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1993 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1994 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1995 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1996 AsmParser->hasInv2PiInlineImm())) { 1997 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1998 setImmKindConst(); 1999 return; 2000 } 2001 2002 // Non-inlineable 2003 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 2004 // For fp operands we check if low 32 bits are zeros 2005 if (Literal.getLoBits(32) != 0) { 2006 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 2007 "Can't encode literal as exact 64-bit floating-point operand. " 2008 "Low 32-bits will be set to zero"); 2009 } 2010 2011 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 2012 setImmKindLiteral(); 2013 return; 2014 } 2015 2016 // We don't allow fp literals in 64-bit integer instructions. It is 2017 // unclear how we should encode them. This case should be checked earlier 2018 // in predicate methods (isLiteralImm()) 2019 llvm_unreachable("fp literal in 64-bit integer instruction."); 2020 2021 case AMDGPU::OPERAND_REG_IMM_INT32: 2022 case AMDGPU::OPERAND_REG_IMM_FP32: 2023 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2024 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2025 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2026 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2027 case AMDGPU::OPERAND_REG_IMM_INT16: 2028 case AMDGPU::OPERAND_REG_IMM_FP16: 2029 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2030 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2031 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2032 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2033 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2034 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2035 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2036 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 2037 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2038 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2039 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2040 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2041 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2042 case AMDGPU::OPERAND_REG_IMM_V2INT32: { 2043 bool lost; 2044 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2045 // Convert literal to single precision 2046 FPLiteral.convert(*getOpFltSemantics(OpTy), 2047 APFloat::rmNearestTiesToEven, &lost); 2048 // We allow precision lost but not overflow or underflow. This should be 2049 // checked earlier in isLiteralImm() 2050 2051 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 2052 Inst.addOperand(MCOperand::createImm(ImmVal)); 2053 setImmKindLiteral(); 2054 return; 2055 } 2056 default: 2057 llvm_unreachable("invalid operand size"); 2058 } 2059 2060 return; 2061 } 2062 2063 // We got int literal token. 2064 // Only sign extend inline immediates. 2065 switch (OpTy) { 2066 case AMDGPU::OPERAND_REG_IMM_INT32: 2067 case AMDGPU::OPERAND_REG_IMM_FP32: 2068 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 2069 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 2070 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 2071 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 2072 case AMDGPU::OPERAND_REG_IMM_V2INT16: 2073 case AMDGPU::OPERAND_REG_IMM_V2FP16: 2074 case AMDGPU::OPERAND_REG_IMM_V2FP32: 2075 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 2076 case AMDGPU::OPERAND_REG_IMM_V2INT32: 2077 case AMDGPU::OPERAND_REG_INLINE_C_V2INT32: 2078 if (isSafeTruncation(Val, 32) && 2079 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 2080 AsmParser->hasInv2PiInlineImm())) { 2081 Inst.addOperand(MCOperand::createImm(Val)); 2082 setImmKindConst(); 2083 return; 2084 } 2085 2086 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 2087 setImmKindLiteral(); 2088 return; 2089 2090 case AMDGPU::OPERAND_REG_IMM_INT64: 2091 case AMDGPU::OPERAND_REG_IMM_FP64: 2092 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 2093 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 2094 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 2095 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 2096 Inst.addOperand(MCOperand::createImm(Val)); 2097 setImmKindConst(); 2098 return; 2099 } 2100 2101 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 2102 setImmKindLiteral(); 2103 return; 2104 2105 case AMDGPU::OPERAND_REG_IMM_INT16: 2106 case AMDGPU::OPERAND_REG_IMM_FP16: 2107 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 2108 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 2109 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 2110 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 2111 if (isSafeTruncation(Val, 16) && 2112 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2113 AsmParser->hasInv2PiInlineImm())) { 2114 Inst.addOperand(MCOperand::createImm(Val)); 2115 setImmKindConst(); 2116 return; 2117 } 2118 2119 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 2120 setImmKindLiteral(); 2121 return; 2122 2123 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 2124 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 2125 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 2126 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 2127 assert(isSafeTruncation(Val, 16)); 2128 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 2129 AsmParser->hasInv2PiInlineImm())); 2130 2131 Inst.addOperand(MCOperand::createImm(Val)); 2132 return; 2133 } 2134 default: 2135 llvm_unreachable("invalid operand size"); 2136 } 2137 } 2138 2139 template <unsigned Bitwidth> 2140 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 2141 APInt Literal(64, Imm.Val); 2142 setImmKindNone(); 2143 2144 if (!Imm.IsFPImm) { 2145 // We got int literal token. 2146 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 2147 return; 2148 } 2149 2150 bool Lost; 2151 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 2152 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 2153 APFloat::rmNearestTiesToEven, &Lost); 2154 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 2155 } 2156 2157 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 2158 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 2159 } 2160 2161 static bool isInlineValue(unsigned Reg) { 2162 switch (Reg) { 2163 case AMDGPU::SRC_SHARED_BASE: 2164 case AMDGPU::SRC_SHARED_LIMIT: 2165 case AMDGPU::SRC_PRIVATE_BASE: 2166 case AMDGPU::SRC_PRIVATE_LIMIT: 2167 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 2168 return true; 2169 case AMDGPU::SRC_VCCZ: 2170 case AMDGPU::SRC_EXECZ: 2171 case AMDGPU::SRC_SCC: 2172 return true; 2173 case AMDGPU::SGPR_NULL: 2174 return true; 2175 default: 2176 return false; 2177 } 2178 } 2179 2180 bool AMDGPUOperand::isInlineValue() const { 2181 return isRegKind() && ::isInlineValue(getReg()); 2182 } 2183 2184 //===----------------------------------------------------------------------===// 2185 // AsmParser 2186 //===----------------------------------------------------------------------===// 2187 2188 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2189 if (Is == IS_VGPR) { 2190 switch (RegWidth) { 2191 default: return -1; 2192 case 1: return AMDGPU::VGPR_32RegClassID; 2193 case 2: return AMDGPU::VReg_64RegClassID; 2194 case 3: return AMDGPU::VReg_96RegClassID; 2195 case 4: return AMDGPU::VReg_128RegClassID; 2196 case 5: return AMDGPU::VReg_160RegClassID; 2197 case 6: return AMDGPU::VReg_192RegClassID; 2198 case 8: return AMDGPU::VReg_256RegClassID; 2199 case 16: return AMDGPU::VReg_512RegClassID; 2200 case 32: return AMDGPU::VReg_1024RegClassID; 2201 } 2202 } else if (Is == IS_TTMP) { 2203 switch (RegWidth) { 2204 default: return -1; 2205 case 1: return AMDGPU::TTMP_32RegClassID; 2206 case 2: return AMDGPU::TTMP_64RegClassID; 2207 case 4: return AMDGPU::TTMP_128RegClassID; 2208 case 8: return AMDGPU::TTMP_256RegClassID; 2209 case 16: return AMDGPU::TTMP_512RegClassID; 2210 } 2211 } else if (Is == IS_SGPR) { 2212 switch (RegWidth) { 2213 default: return -1; 2214 case 1: return AMDGPU::SGPR_32RegClassID; 2215 case 2: return AMDGPU::SGPR_64RegClassID; 2216 case 3: return AMDGPU::SGPR_96RegClassID; 2217 case 4: return AMDGPU::SGPR_128RegClassID; 2218 case 5: return AMDGPU::SGPR_160RegClassID; 2219 case 6: return AMDGPU::SGPR_192RegClassID; 2220 case 8: return AMDGPU::SGPR_256RegClassID; 2221 case 16: return AMDGPU::SGPR_512RegClassID; 2222 } 2223 } else if (Is == IS_AGPR) { 2224 switch (RegWidth) { 2225 default: return -1; 2226 case 1: return AMDGPU::AGPR_32RegClassID; 2227 case 2: return AMDGPU::AReg_64RegClassID; 2228 case 3: return AMDGPU::AReg_96RegClassID; 2229 case 4: return AMDGPU::AReg_128RegClassID; 2230 case 5: return AMDGPU::AReg_160RegClassID; 2231 case 6: return AMDGPU::AReg_192RegClassID; 2232 case 8: return AMDGPU::AReg_256RegClassID; 2233 case 16: return AMDGPU::AReg_512RegClassID; 2234 case 32: return AMDGPU::AReg_1024RegClassID; 2235 } 2236 } 2237 return -1; 2238 } 2239 2240 static unsigned getSpecialRegForName(StringRef RegName) { 2241 return StringSwitch<unsigned>(RegName) 2242 .Case("exec", AMDGPU::EXEC) 2243 .Case("vcc", AMDGPU::VCC) 2244 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2245 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2246 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2247 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2248 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2249 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2250 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2251 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2252 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2253 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2254 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2255 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2256 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2257 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2258 .Case("m0", AMDGPU::M0) 2259 .Case("vccz", AMDGPU::SRC_VCCZ) 2260 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2261 .Case("execz", AMDGPU::SRC_EXECZ) 2262 .Case("src_execz", AMDGPU::SRC_EXECZ) 2263 .Case("scc", AMDGPU::SRC_SCC) 2264 .Case("src_scc", AMDGPU::SRC_SCC) 2265 .Case("tba", AMDGPU::TBA) 2266 .Case("tma", AMDGPU::TMA) 2267 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2268 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2269 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2270 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2271 .Case("vcc_lo", AMDGPU::VCC_LO) 2272 .Case("vcc_hi", AMDGPU::VCC_HI) 2273 .Case("exec_lo", AMDGPU::EXEC_LO) 2274 .Case("exec_hi", AMDGPU::EXEC_HI) 2275 .Case("tma_lo", AMDGPU::TMA_LO) 2276 .Case("tma_hi", AMDGPU::TMA_HI) 2277 .Case("tba_lo", AMDGPU::TBA_LO) 2278 .Case("tba_hi", AMDGPU::TBA_HI) 2279 .Case("pc", AMDGPU::PC_REG) 2280 .Case("null", AMDGPU::SGPR_NULL) 2281 .Default(AMDGPU::NoRegister); 2282 } 2283 2284 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2285 SMLoc &EndLoc, bool RestoreOnFailure) { 2286 auto R = parseRegister(); 2287 if (!R) return true; 2288 assert(R->isReg()); 2289 RegNo = R->getReg(); 2290 StartLoc = R->getStartLoc(); 2291 EndLoc = R->getEndLoc(); 2292 return false; 2293 } 2294 2295 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2296 SMLoc &EndLoc) { 2297 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2298 } 2299 2300 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2301 SMLoc &StartLoc, 2302 SMLoc &EndLoc) { 2303 bool Result = 2304 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2305 bool PendingErrors = getParser().hasPendingError(); 2306 getParser().clearPendingErrors(); 2307 if (PendingErrors) 2308 return MatchOperand_ParseFail; 2309 if (Result) 2310 return MatchOperand_NoMatch; 2311 return MatchOperand_Success; 2312 } 2313 2314 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2315 RegisterKind RegKind, unsigned Reg1, 2316 SMLoc Loc) { 2317 switch (RegKind) { 2318 case IS_SPECIAL: 2319 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2320 Reg = AMDGPU::EXEC; 2321 RegWidth = 2; 2322 return true; 2323 } 2324 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2325 Reg = AMDGPU::FLAT_SCR; 2326 RegWidth = 2; 2327 return true; 2328 } 2329 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2330 Reg = AMDGPU::XNACK_MASK; 2331 RegWidth = 2; 2332 return true; 2333 } 2334 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2335 Reg = AMDGPU::VCC; 2336 RegWidth = 2; 2337 return true; 2338 } 2339 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2340 Reg = AMDGPU::TBA; 2341 RegWidth = 2; 2342 return true; 2343 } 2344 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2345 Reg = AMDGPU::TMA; 2346 RegWidth = 2; 2347 return true; 2348 } 2349 Error(Loc, "register does not fit in the list"); 2350 return false; 2351 case IS_VGPR: 2352 case IS_SGPR: 2353 case IS_AGPR: 2354 case IS_TTMP: 2355 if (Reg1 != Reg + RegWidth) { 2356 Error(Loc, "registers in a list must have consecutive indices"); 2357 return false; 2358 } 2359 RegWidth++; 2360 return true; 2361 default: 2362 llvm_unreachable("unexpected register kind"); 2363 } 2364 } 2365 2366 struct RegInfo { 2367 StringLiteral Name; 2368 RegisterKind Kind; 2369 }; 2370 2371 static constexpr RegInfo RegularRegisters[] = { 2372 {{"v"}, IS_VGPR}, 2373 {{"s"}, IS_SGPR}, 2374 {{"ttmp"}, IS_TTMP}, 2375 {{"acc"}, IS_AGPR}, 2376 {{"a"}, IS_AGPR}, 2377 }; 2378 2379 static bool isRegularReg(RegisterKind Kind) { 2380 return Kind == IS_VGPR || 2381 Kind == IS_SGPR || 2382 Kind == IS_TTMP || 2383 Kind == IS_AGPR; 2384 } 2385 2386 static const RegInfo* getRegularRegInfo(StringRef Str) { 2387 for (const RegInfo &Reg : RegularRegisters) 2388 if (Str.startswith(Reg.Name)) 2389 return &Reg; 2390 return nullptr; 2391 } 2392 2393 static bool getRegNum(StringRef Str, unsigned& Num) { 2394 return !Str.getAsInteger(10, Num); 2395 } 2396 2397 bool 2398 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2399 const AsmToken &NextToken) const { 2400 2401 // A list of consecutive registers: [s0,s1,s2,s3] 2402 if (Token.is(AsmToken::LBrac)) 2403 return true; 2404 2405 if (!Token.is(AsmToken::Identifier)) 2406 return false; 2407 2408 // A single register like s0 or a range of registers like s[0:1] 2409 2410 StringRef Str = Token.getString(); 2411 const RegInfo *Reg = getRegularRegInfo(Str); 2412 if (Reg) { 2413 StringRef RegName = Reg->Name; 2414 StringRef RegSuffix = Str.substr(RegName.size()); 2415 if (!RegSuffix.empty()) { 2416 unsigned Num; 2417 // A single register with an index: rXX 2418 if (getRegNum(RegSuffix, Num)) 2419 return true; 2420 } else { 2421 // A range of registers: r[XX:YY]. 2422 if (NextToken.is(AsmToken::LBrac)) 2423 return true; 2424 } 2425 } 2426 2427 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2428 } 2429 2430 bool 2431 AMDGPUAsmParser::isRegister() 2432 { 2433 return isRegister(getToken(), peekToken()); 2434 } 2435 2436 unsigned 2437 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2438 unsigned RegNum, 2439 unsigned RegWidth, 2440 SMLoc Loc) { 2441 2442 assert(isRegularReg(RegKind)); 2443 2444 unsigned AlignSize = 1; 2445 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2446 // SGPR and TTMP registers must be aligned. 2447 // Max required alignment is 4 dwords. 2448 AlignSize = std::min(RegWidth, 4u); 2449 } 2450 2451 if (RegNum % AlignSize != 0) { 2452 Error(Loc, "invalid register alignment"); 2453 return AMDGPU::NoRegister; 2454 } 2455 2456 unsigned RegIdx = RegNum / AlignSize; 2457 int RCID = getRegClass(RegKind, RegWidth); 2458 if (RCID == -1) { 2459 Error(Loc, "invalid or unsupported register size"); 2460 return AMDGPU::NoRegister; 2461 } 2462 2463 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2464 const MCRegisterClass RC = TRI->getRegClass(RCID); 2465 if (RegIdx >= RC.getNumRegs()) { 2466 Error(Loc, "register index is out of range"); 2467 return AMDGPU::NoRegister; 2468 } 2469 2470 return RC.getRegister(RegIdx); 2471 } 2472 2473 bool 2474 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2475 int64_t RegLo, RegHi; 2476 if (!skipToken(AsmToken::LBrac, "missing register index")) 2477 return false; 2478 2479 SMLoc FirstIdxLoc = getLoc(); 2480 SMLoc SecondIdxLoc; 2481 2482 if (!parseExpr(RegLo)) 2483 return false; 2484 2485 if (trySkipToken(AsmToken::Colon)) { 2486 SecondIdxLoc = getLoc(); 2487 if (!parseExpr(RegHi)) 2488 return false; 2489 } else { 2490 RegHi = RegLo; 2491 } 2492 2493 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2494 return false; 2495 2496 if (!isUInt<32>(RegLo)) { 2497 Error(FirstIdxLoc, "invalid register index"); 2498 return false; 2499 } 2500 2501 if (!isUInt<32>(RegHi)) { 2502 Error(SecondIdxLoc, "invalid register index"); 2503 return false; 2504 } 2505 2506 if (RegLo > RegHi) { 2507 Error(FirstIdxLoc, "first register index should not exceed second index"); 2508 return false; 2509 } 2510 2511 Num = static_cast<unsigned>(RegLo); 2512 Width = (RegHi - RegLo) + 1; 2513 return true; 2514 } 2515 2516 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2517 unsigned &RegNum, unsigned &RegWidth, 2518 SmallVectorImpl<AsmToken> &Tokens) { 2519 assert(isToken(AsmToken::Identifier)); 2520 unsigned Reg = getSpecialRegForName(getTokenStr()); 2521 if (Reg) { 2522 RegNum = 0; 2523 RegWidth = 1; 2524 RegKind = IS_SPECIAL; 2525 Tokens.push_back(getToken()); 2526 lex(); // skip register name 2527 } 2528 return Reg; 2529 } 2530 2531 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2532 unsigned &RegNum, unsigned &RegWidth, 2533 SmallVectorImpl<AsmToken> &Tokens) { 2534 assert(isToken(AsmToken::Identifier)); 2535 StringRef RegName = getTokenStr(); 2536 auto Loc = getLoc(); 2537 2538 const RegInfo *RI = getRegularRegInfo(RegName); 2539 if (!RI) { 2540 Error(Loc, "invalid register name"); 2541 return AMDGPU::NoRegister; 2542 } 2543 2544 Tokens.push_back(getToken()); 2545 lex(); // skip register name 2546 2547 RegKind = RI->Kind; 2548 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2549 if (!RegSuffix.empty()) { 2550 // Single 32-bit register: vXX. 2551 if (!getRegNum(RegSuffix, RegNum)) { 2552 Error(Loc, "invalid register index"); 2553 return AMDGPU::NoRegister; 2554 } 2555 RegWidth = 1; 2556 } else { 2557 // Range of registers: v[XX:YY]. ":YY" is optional. 2558 if (!ParseRegRange(RegNum, RegWidth)) 2559 return AMDGPU::NoRegister; 2560 } 2561 2562 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2563 } 2564 2565 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2566 unsigned &RegWidth, 2567 SmallVectorImpl<AsmToken> &Tokens) { 2568 unsigned Reg = AMDGPU::NoRegister; 2569 auto ListLoc = getLoc(); 2570 2571 if (!skipToken(AsmToken::LBrac, 2572 "expected a register or a list of registers")) { 2573 return AMDGPU::NoRegister; 2574 } 2575 2576 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2577 2578 auto Loc = getLoc(); 2579 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2580 return AMDGPU::NoRegister; 2581 if (RegWidth != 1) { 2582 Error(Loc, "expected a single 32-bit register"); 2583 return AMDGPU::NoRegister; 2584 } 2585 2586 for (; trySkipToken(AsmToken::Comma); ) { 2587 RegisterKind NextRegKind; 2588 unsigned NextReg, NextRegNum, NextRegWidth; 2589 Loc = getLoc(); 2590 2591 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2592 NextRegNum, NextRegWidth, 2593 Tokens)) { 2594 return AMDGPU::NoRegister; 2595 } 2596 if (NextRegWidth != 1) { 2597 Error(Loc, "expected a single 32-bit register"); 2598 return AMDGPU::NoRegister; 2599 } 2600 if (NextRegKind != RegKind) { 2601 Error(Loc, "registers in a list must be of the same kind"); 2602 return AMDGPU::NoRegister; 2603 } 2604 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2605 return AMDGPU::NoRegister; 2606 } 2607 2608 if (!skipToken(AsmToken::RBrac, 2609 "expected a comma or a closing square bracket")) { 2610 return AMDGPU::NoRegister; 2611 } 2612 2613 if (isRegularReg(RegKind)) 2614 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2615 2616 return Reg; 2617 } 2618 2619 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2620 unsigned &RegNum, unsigned &RegWidth, 2621 SmallVectorImpl<AsmToken> &Tokens) { 2622 auto Loc = getLoc(); 2623 Reg = AMDGPU::NoRegister; 2624 2625 if (isToken(AsmToken::Identifier)) { 2626 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2627 if (Reg == AMDGPU::NoRegister) 2628 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2629 } else { 2630 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2631 } 2632 2633 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2634 if (Reg == AMDGPU::NoRegister) { 2635 assert(Parser.hasPendingError()); 2636 return false; 2637 } 2638 2639 if (!subtargetHasRegister(*TRI, Reg)) { 2640 if (Reg == AMDGPU::SGPR_NULL) { 2641 Error(Loc, "'null' operand is not supported on this GPU"); 2642 } else { 2643 Error(Loc, "register not available on this GPU"); 2644 } 2645 return false; 2646 } 2647 2648 return true; 2649 } 2650 2651 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2652 unsigned &RegNum, unsigned &RegWidth, 2653 bool RestoreOnFailure /*=false*/) { 2654 Reg = AMDGPU::NoRegister; 2655 2656 SmallVector<AsmToken, 1> Tokens; 2657 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2658 if (RestoreOnFailure) { 2659 while (!Tokens.empty()) { 2660 getLexer().UnLex(Tokens.pop_back_val()); 2661 } 2662 } 2663 return true; 2664 } 2665 return false; 2666 } 2667 2668 Optional<StringRef> 2669 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2670 switch (RegKind) { 2671 case IS_VGPR: 2672 return StringRef(".amdgcn.next_free_vgpr"); 2673 case IS_SGPR: 2674 return StringRef(".amdgcn.next_free_sgpr"); 2675 default: 2676 return None; 2677 } 2678 } 2679 2680 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2681 auto SymbolName = getGprCountSymbolName(RegKind); 2682 assert(SymbolName && "initializing invalid register kind"); 2683 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2684 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2685 } 2686 2687 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2688 unsigned DwordRegIndex, 2689 unsigned RegWidth) { 2690 // Symbols are only defined for GCN targets 2691 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2692 return true; 2693 2694 auto SymbolName = getGprCountSymbolName(RegKind); 2695 if (!SymbolName) 2696 return true; 2697 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2698 2699 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2700 int64_t OldCount; 2701 2702 if (!Sym->isVariable()) 2703 return !Error(getLoc(), 2704 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2705 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2706 return !Error( 2707 getLoc(), 2708 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2709 2710 if (OldCount <= NewMax) 2711 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2712 2713 return true; 2714 } 2715 2716 std::unique_ptr<AMDGPUOperand> 2717 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2718 const auto &Tok = getToken(); 2719 SMLoc StartLoc = Tok.getLoc(); 2720 SMLoc EndLoc = Tok.getEndLoc(); 2721 RegisterKind RegKind; 2722 unsigned Reg, RegNum, RegWidth; 2723 2724 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2725 return nullptr; 2726 } 2727 if (isHsaAbiVersion3(&getSTI())) { 2728 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2729 return nullptr; 2730 } else 2731 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2732 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2733 } 2734 2735 OperandMatchResultTy 2736 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2737 // TODO: add syntactic sugar for 1/(2*PI) 2738 2739 assert(!isRegister()); 2740 assert(!isModifier()); 2741 2742 const auto& Tok = getToken(); 2743 const auto& NextTok = peekToken(); 2744 bool IsReal = Tok.is(AsmToken::Real); 2745 SMLoc S = getLoc(); 2746 bool Negate = false; 2747 2748 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2749 lex(); 2750 IsReal = true; 2751 Negate = true; 2752 } 2753 2754 if (IsReal) { 2755 // Floating-point expressions are not supported. 2756 // Can only allow floating-point literals with an 2757 // optional sign. 2758 2759 StringRef Num = getTokenStr(); 2760 lex(); 2761 2762 APFloat RealVal(APFloat::IEEEdouble()); 2763 auto roundMode = APFloat::rmNearestTiesToEven; 2764 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2765 return MatchOperand_ParseFail; 2766 } 2767 if (Negate) 2768 RealVal.changeSign(); 2769 2770 Operands.push_back( 2771 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2772 AMDGPUOperand::ImmTyNone, true)); 2773 2774 return MatchOperand_Success; 2775 2776 } else { 2777 int64_t IntVal; 2778 const MCExpr *Expr; 2779 SMLoc S = getLoc(); 2780 2781 if (HasSP3AbsModifier) { 2782 // This is a workaround for handling expressions 2783 // as arguments of SP3 'abs' modifier, for example: 2784 // |1.0| 2785 // |-1| 2786 // |1+x| 2787 // This syntax is not compatible with syntax of standard 2788 // MC expressions (due to the trailing '|'). 2789 SMLoc EndLoc; 2790 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2791 return MatchOperand_ParseFail; 2792 } else { 2793 if (Parser.parseExpression(Expr)) 2794 return MatchOperand_ParseFail; 2795 } 2796 2797 if (Expr->evaluateAsAbsolute(IntVal)) { 2798 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2799 } else { 2800 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2801 } 2802 2803 return MatchOperand_Success; 2804 } 2805 2806 return MatchOperand_NoMatch; 2807 } 2808 2809 OperandMatchResultTy 2810 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2811 if (!isRegister()) 2812 return MatchOperand_NoMatch; 2813 2814 if (auto R = parseRegister()) { 2815 assert(R->isReg()); 2816 Operands.push_back(std::move(R)); 2817 return MatchOperand_Success; 2818 } 2819 return MatchOperand_ParseFail; 2820 } 2821 2822 OperandMatchResultTy 2823 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2824 auto res = parseReg(Operands); 2825 if (res != MatchOperand_NoMatch) { 2826 return res; 2827 } else if (isModifier()) { 2828 return MatchOperand_NoMatch; 2829 } else { 2830 return parseImm(Operands, HasSP3AbsMod); 2831 } 2832 } 2833 2834 bool 2835 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2836 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2837 const auto &str = Token.getString(); 2838 return str == "abs" || str == "neg" || str == "sext"; 2839 } 2840 return false; 2841 } 2842 2843 bool 2844 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2845 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2846 } 2847 2848 bool 2849 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2850 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2851 } 2852 2853 bool 2854 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2855 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2856 } 2857 2858 // Check if this is an operand modifier or an opcode modifier 2859 // which may look like an expression but it is not. We should 2860 // avoid parsing these modifiers as expressions. Currently 2861 // recognized sequences are: 2862 // |...| 2863 // abs(...) 2864 // neg(...) 2865 // sext(...) 2866 // -reg 2867 // -|...| 2868 // -abs(...) 2869 // name:... 2870 // Note that simple opcode modifiers like 'gds' may be parsed as 2871 // expressions; this is a special case. See getExpressionAsToken. 2872 // 2873 bool 2874 AMDGPUAsmParser::isModifier() { 2875 2876 AsmToken Tok = getToken(); 2877 AsmToken NextToken[2]; 2878 peekTokens(NextToken); 2879 2880 return isOperandModifier(Tok, NextToken[0]) || 2881 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2882 isOpcodeModifierWithVal(Tok, NextToken[0]); 2883 } 2884 2885 // Check if the current token is an SP3 'neg' modifier. 2886 // Currently this modifier is allowed in the following context: 2887 // 2888 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2889 // 2. Before an 'abs' modifier: -abs(...) 2890 // 3. Before an SP3 'abs' modifier: -|...| 2891 // 2892 // In all other cases "-" is handled as a part 2893 // of an expression that follows the sign. 2894 // 2895 // Note: When "-" is followed by an integer literal, 2896 // this is interpreted as integer negation rather 2897 // than a floating-point NEG modifier applied to N. 2898 // Beside being contr-intuitive, such use of floating-point 2899 // NEG modifier would have resulted in different meaning 2900 // of integer literals used with VOP1/2/C and VOP3, 2901 // for example: 2902 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2903 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2904 // Negative fp literals with preceding "-" are 2905 // handled likewise for unifomtity 2906 // 2907 bool 2908 AMDGPUAsmParser::parseSP3NegModifier() { 2909 2910 AsmToken NextToken[2]; 2911 peekTokens(NextToken); 2912 2913 if (isToken(AsmToken::Minus) && 2914 (isRegister(NextToken[0], NextToken[1]) || 2915 NextToken[0].is(AsmToken::Pipe) || 2916 isId(NextToken[0], "abs"))) { 2917 lex(); 2918 return true; 2919 } 2920 2921 return false; 2922 } 2923 2924 OperandMatchResultTy 2925 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2926 bool AllowImm) { 2927 bool Neg, SP3Neg; 2928 bool Abs, SP3Abs; 2929 SMLoc Loc; 2930 2931 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2932 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2933 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2934 return MatchOperand_ParseFail; 2935 } 2936 2937 SP3Neg = parseSP3NegModifier(); 2938 2939 Loc = getLoc(); 2940 Neg = trySkipId("neg"); 2941 if (Neg && SP3Neg) { 2942 Error(Loc, "expected register or immediate"); 2943 return MatchOperand_ParseFail; 2944 } 2945 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2946 return MatchOperand_ParseFail; 2947 2948 Abs = trySkipId("abs"); 2949 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2950 return MatchOperand_ParseFail; 2951 2952 Loc = getLoc(); 2953 SP3Abs = trySkipToken(AsmToken::Pipe); 2954 if (Abs && SP3Abs) { 2955 Error(Loc, "expected register or immediate"); 2956 return MatchOperand_ParseFail; 2957 } 2958 2959 OperandMatchResultTy Res; 2960 if (AllowImm) { 2961 Res = parseRegOrImm(Operands, SP3Abs); 2962 } else { 2963 Res = parseReg(Operands); 2964 } 2965 if (Res != MatchOperand_Success) { 2966 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2967 } 2968 2969 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2970 return MatchOperand_ParseFail; 2971 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2972 return MatchOperand_ParseFail; 2973 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2974 return MatchOperand_ParseFail; 2975 2976 AMDGPUOperand::Modifiers Mods; 2977 Mods.Abs = Abs || SP3Abs; 2978 Mods.Neg = Neg || SP3Neg; 2979 2980 if (Mods.hasFPModifiers()) { 2981 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2982 if (Op.isExpr()) { 2983 Error(Op.getStartLoc(), "expected an absolute expression"); 2984 return MatchOperand_ParseFail; 2985 } 2986 Op.setModifiers(Mods); 2987 } 2988 return MatchOperand_Success; 2989 } 2990 2991 OperandMatchResultTy 2992 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2993 bool AllowImm) { 2994 bool Sext = trySkipId("sext"); 2995 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2996 return MatchOperand_ParseFail; 2997 2998 OperandMatchResultTy Res; 2999 if (AllowImm) { 3000 Res = parseRegOrImm(Operands); 3001 } else { 3002 Res = parseReg(Operands); 3003 } 3004 if (Res != MatchOperand_Success) { 3005 return Sext? MatchOperand_ParseFail : Res; 3006 } 3007 3008 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 3009 return MatchOperand_ParseFail; 3010 3011 AMDGPUOperand::Modifiers Mods; 3012 Mods.Sext = Sext; 3013 3014 if (Mods.hasIntModifiers()) { 3015 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 3016 if (Op.isExpr()) { 3017 Error(Op.getStartLoc(), "expected an absolute expression"); 3018 return MatchOperand_ParseFail; 3019 } 3020 Op.setModifiers(Mods); 3021 } 3022 3023 return MatchOperand_Success; 3024 } 3025 3026 OperandMatchResultTy 3027 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 3028 return parseRegOrImmWithFPInputMods(Operands, false); 3029 } 3030 3031 OperandMatchResultTy 3032 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 3033 return parseRegOrImmWithIntInputMods(Operands, false); 3034 } 3035 3036 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 3037 auto Loc = getLoc(); 3038 if (trySkipId("off")) { 3039 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 3040 AMDGPUOperand::ImmTyOff, false)); 3041 return MatchOperand_Success; 3042 } 3043 3044 if (!isRegister()) 3045 return MatchOperand_NoMatch; 3046 3047 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 3048 if (Reg) { 3049 Operands.push_back(std::move(Reg)); 3050 return MatchOperand_Success; 3051 } 3052 3053 return MatchOperand_ParseFail; 3054 3055 } 3056 3057 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 3058 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3059 3060 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 3061 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 3062 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 3063 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 3064 return Match_InvalidOperand; 3065 3066 if ((TSFlags & SIInstrFlags::VOP3) && 3067 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 3068 getForcedEncodingSize() != 64) 3069 return Match_PreferE32; 3070 3071 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 3072 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 3073 // v_mac_f32/16 allow only dst_sel == DWORD; 3074 auto OpNum = 3075 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 3076 const auto &Op = Inst.getOperand(OpNum); 3077 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 3078 return Match_InvalidOperand; 3079 } 3080 } 3081 3082 return Match_Success; 3083 } 3084 3085 static ArrayRef<unsigned> getAllVariants() { 3086 static const unsigned Variants[] = { 3087 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 3088 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 3089 }; 3090 3091 return makeArrayRef(Variants); 3092 } 3093 3094 // What asm variants we should check 3095 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 3096 if (getForcedEncodingSize() == 32) { 3097 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 3098 return makeArrayRef(Variants); 3099 } 3100 3101 if (isForcedVOP3()) { 3102 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 3103 return makeArrayRef(Variants); 3104 } 3105 3106 if (isForcedSDWA()) { 3107 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 3108 AMDGPUAsmVariants::SDWA9}; 3109 return makeArrayRef(Variants); 3110 } 3111 3112 if (isForcedDPP()) { 3113 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 3114 return makeArrayRef(Variants); 3115 } 3116 3117 return getAllVariants(); 3118 } 3119 3120 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 3121 if (getForcedEncodingSize() == 32) 3122 return "e32"; 3123 3124 if (isForcedVOP3()) 3125 return "e64"; 3126 3127 if (isForcedSDWA()) 3128 return "sdwa"; 3129 3130 if (isForcedDPP()) 3131 return "dpp"; 3132 3133 return ""; 3134 } 3135 3136 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 3137 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3138 const unsigned Num = Desc.getNumImplicitUses(); 3139 for (unsigned i = 0; i < Num; ++i) { 3140 unsigned Reg = Desc.ImplicitUses[i]; 3141 switch (Reg) { 3142 case AMDGPU::FLAT_SCR: 3143 case AMDGPU::VCC: 3144 case AMDGPU::VCC_LO: 3145 case AMDGPU::VCC_HI: 3146 case AMDGPU::M0: 3147 return Reg; 3148 default: 3149 break; 3150 } 3151 } 3152 return AMDGPU::NoRegister; 3153 } 3154 3155 // NB: This code is correct only when used to check constant 3156 // bus limitations because GFX7 support no f16 inline constants. 3157 // Note that there are no cases when a GFX7 opcode violates 3158 // constant bus limitations due to the use of an f16 constant. 3159 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 3160 unsigned OpIdx) const { 3161 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 3162 3163 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3164 return false; 3165 } 3166 3167 const MCOperand &MO = Inst.getOperand(OpIdx); 3168 3169 int64_t Val = MO.getImm(); 3170 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3171 3172 switch (OpSize) { // expected operand size 3173 case 8: 3174 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3175 case 4: 3176 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3177 case 2: { 3178 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3179 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3180 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3181 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3182 return AMDGPU::isInlinableIntLiteral(Val); 3183 3184 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3185 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3186 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3187 return AMDGPU::isInlinableIntLiteralV216(Val); 3188 3189 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3190 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3191 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3192 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3193 3194 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3195 } 3196 default: 3197 llvm_unreachable("invalid operand size"); 3198 } 3199 } 3200 3201 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3202 if (!isGFX10Plus()) 3203 return 1; 3204 3205 switch (Opcode) { 3206 // 64-bit shift instructions can use only one scalar value input 3207 case AMDGPU::V_LSHLREV_B64_e64: 3208 case AMDGPU::V_LSHLREV_B64_gfx10: 3209 case AMDGPU::V_LSHRREV_B64_e64: 3210 case AMDGPU::V_LSHRREV_B64_gfx10: 3211 case AMDGPU::V_ASHRREV_I64_e64: 3212 case AMDGPU::V_ASHRREV_I64_gfx10: 3213 case AMDGPU::V_LSHL_B64_e64: 3214 case AMDGPU::V_LSHR_B64_e64: 3215 case AMDGPU::V_ASHR_I64_e64: 3216 return 1; 3217 default: 3218 return 2; 3219 } 3220 } 3221 3222 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3223 const MCOperand &MO = Inst.getOperand(OpIdx); 3224 if (MO.isImm()) { 3225 return !isInlineConstant(Inst, OpIdx); 3226 } else if (MO.isReg()) { 3227 auto Reg = MO.getReg(); 3228 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3229 auto PReg = mc2PseudoReg(Reg); 3230 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3231 } else { 3232 return true; 3233 } 3234 } 3235 3236 bool 3237 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3238 const OperandVector &Operands) { 3239 const unsigned Opcode = Inst.getOpcode(); 3240 const MCInstrDesc &Desc = MII.get(Opcode); 3241 unsigned LastSGPR = AMDGPU::NoRegister; 3242 unsigned ConstantBusUseCount = 0; 3243 unsigned NumLiterals = 0; 3244 unsigned LiteralSize; 3245 3246 if (Desc.TSFlags & 3247 (SIInstrFlags::VOPC | 3248 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3249 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3250 SIInstrFlags::SDWA)) { 3251 // Check special imm operands (used by madmk, etc) 3252 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3253 ++ConstantBusUseCount; 3254 } 3255 3256 SmallDenseSet<unsigned> SGPRsUsed; 3257 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3258 if (SGPRUsed != AMDGPU::NoRegister) { 3259 SGPRsUsed.insert(SGPRUsed); 3260 ++ConstantBusUseCount; 3261 } 3262 3263 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3264 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3265 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3266 3267 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3268 3269 for (int OpIdx : OpIndices) { 3270 if (OpIdx == -1) break; 3271 3272 const MCOperand &MO = Inst.getOperand(OpIdx); 3273 if (usesConstantBus(Inst, OpIdx)) { 3274 if (MO.isReg()) { 3275 LastSGPR = mc2PseudoReg(MO.getReg()); 3276 // Pairs of registers with a partial intersections like these 3277 // s0, s[0:1] 3278 // flat_scratch_lo, flat_scratch 3279 // flat_scratch_lo, flat_scratch_hi 3280 // are theoretically valid but they are disabled anyway. 3281 // Note that this code mimics SIInstrInfo::verifyInstruction 3282 if (!SGPRsUsed.count(LastSGPR)) { 3283 SGPRsUsed.insert(LastSGPR); 3284 ++ConstantBusUseCount; 3285 } 3286 } else { // Expression or a literal 3287 3288 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3289 continue; // special operand like VINTERP attr_chan 3290 3291 // An instruction may use only one literal. 3292 // This has been validated on the previous step. 3293 // See validateVOP3Literal. 3294 // This literal may be used as more than one operand. 3295 // If all these operands are of the same size, 3296 // this literal counts as one scalar value. 3297 // Otherwise it counts as 2 scalar values. 3298 // See "GFX10 Shader Programming", section 3.6.2.3. 3299 3300 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3301 if (Size < 4) Size = 4; 3302 3303 if (NumLiterals == 0) { 3304 NumLiterals = 1; 3305 LiteralSize = Size; 3306 } else if (LiteralSize != Size) { 3307 NumLiterals = 2; 3308 } 3309 } 3310 } 3311 } 3312 } 3313 ConstantBusUseCount += NumLiterals; 3314 3315 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3316 return true; 3317 3318 SMLoc LitLoc = getLitLoc(Operands); 3319 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3320 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3321 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3322 return false; 3323 } 3324 3325 bool 3326 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3327 const OperandVector &Operands) { 3328 const unsigned Opcode = Inst.getOpcode(); 3329 const MCInstrDesc &Desc = MII.get(Opcode); 3330 3331 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3332 if (DstIdx == -1 || 3333 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3334 return true; 3335 } 3336 3337 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3338 3339 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3340 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3341 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3342 3343 assert(DstIdx != -1); 3344 const MCOperand &Dst = Inst.getOperand(DstIdx); 3345 assert(Dst.isReg()); 3346 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3347 3348 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3349 3350 for (int SrcIdx : SrcIndices) { 3351 if (SrcIdx == -1) break; 3352 const MCOperand &Src = Inst.getOperand(SrcIdx); 3353 if (Src.isReg()) { 3354 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3355 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3356 Error(getRegLoc(SrcReg, Operands), 3357 "destination must be different than all sources"); 3358 return false; 3359 } 3360 } 3361 } 3362 3363 return true; 3364 } 3365 3366 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3367 3368 const unsigned Opc = Inst.getOpcode(); 3369 const MCInstrDesc &Desc = MII.get(Opc); 3370 3371 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3372 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3373 assert(ClampIdx != -1); 3374 return Inst.getOperand(ClampIdx).getImm() == 0; 3375 } 3376 3377 return true; 3378 } 3379 3380 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3381 3382 const unsigned Opc = Inst.getOpcode(); 3383 const MCInstrDesc &Desc = MII.get(Opc); 3384 3385 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3386 return true; 3387 3388 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3389 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3390 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3391 3392 assert(VDataIdx != -1); 3393 3394 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3395 return true; 3396 3397 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3398 unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0; 3399 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3400 if (DMask == 0) 3401 DMask = 1; 3402 3403 unsigned DataSize = 3404 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3405 if (hasPackedD16()) { 3406 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3407 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3408 DataSize = (DataSize + 1) / 2; 3409 } 3410 3411 return (VDataSize / 4) == DataSize + TFESize; 3412 } 3413 3414 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3415 const unsigned Opc = Inst.getOpcode(); 3416 const MCInstrDesc &Desc = MII.get(Opc); 3417 3418 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3419 return true; 3420 3421 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3422 3423 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3424 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3425 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3426 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3427 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3428 3429 assert(VAddr0Idx != -1); 3430 assert(SrsrcIdx != -1); 3431 assert(SrsrcIdx > VAddr0Idx); 3432 3433 if (DimIdx == -1) 3434 return true; // intersect_ray 3435 3436 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3437 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3438 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3439 unsigned VAddrSize = 3440 IsNSA ? SrsrcIdx - VAddr0Idx 3441 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3442 3443 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3444 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3445 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3446 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3447 if (!IsNSA) { 3448 if (AddrSize > 8) 3449 AddrSize = 16; 3450 else if (AddrSize > 4) 3451 AddrSize = 8; 3452 } 3453 3454 return VAddrSize == AddrSize; 3455 } 3456 3457 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3458 3459 const unsigned Opc = Inst.getOpcode(); 3460 const MCInstrDesc &Desc = MII.get(Opc); 3461 3462 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3463 return true; 3464 if (!Desc.mayLoad() || !Desc.mayStore()) 3465 return true; // Not atomic 3466 3467 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3468 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3469 3470 // This is an incomplete check because image_atomic_cmpswap 3471 // may only use 0x3 and 0xf while other atomic operations 3472 // may use 0x1 and 0x3. However these limitations are 3473 // verified when we check that dmask matches dst size. 3474 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3475 } 3476 3477 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3478 3479 const unsigned Opc = Inst.getOpcode(); 3480 const MCInstrDesc &Desc = MII.get(Opc); 3481 3482 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3483 return true; 3484 3485 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3486 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3487 3488 // GATHER4 instructions use dmask in a different fashion compared to 3489 // other MIMG instructions. The only useful DMASK values are 3490 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3491 // (red,red,red,red) etc.) The ISA document doesn't mention 3492 // this. 3493 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3494 } 3495 3496 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) { 3497 const unsigned Opc = Inst.getOpcode(); 3498 const MCInstrDesc &Desc = MII.get(Opc); 3499 3500 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3501 return true; 3502 3503 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3504 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3505 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3506 3507 if (!BaseOpcode->MSAA) 3508 return true; 3509 3510 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3511 assert(DimIdx != -1); 3512 3513 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3514 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3515 3516 return DimInfo->MSAA; 3517 } 3518 3519 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3520 { 3521 switch (Opcode) { 3522 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3523 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3524 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3525 return true; 3526 default: 3527 return false; 3528 } 3529 } 3530 3531 // movrels* opcodes should only allow VGPRS as src0. 3532 // This is specified in .td description for vop1/vop3, 3533 // but sdwa is handled differently. See isSDWAOperand. 3534 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3535 const OperandVector &Operands) { 3536 3537 const unsigned Opc = Inst.getOpcode(); 3538 const MCInstrDesc &Desc = MII.get(Opc); 3539 3540 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3541 return true; 3542 3543 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3544 assert(Src0Idx != -1); 3545 3546 SMLoc ErrLoc; 3547 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3548 if (Src0.isReg()) { 3549 auto Reg = mc2PseudoReg(Src0.getReg()); 3550 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3551 if (!isSGPR(Reg, TRI)) 3552 return true; 3553 ErrLoc = getRegLoc(Reg, Operands); 3554 } else { 3555 ErrLoc = getConstLoc(Operands); 3556 } 3557 3558 Error(ErrLoc, "source operand must be a VGPR"); 3559 return false; 3560 } 3561 3562 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3563 const OperandVector &Operands) { 3564 3565 const unsigned Opc = Inst.getOpcode(); 3566 3567 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3568 return true; 3569 3570 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3571 assert(Src0Idx != -1); 3572 3573 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3574 if (!Src0.isReg()) 3575 return true; 3576 3577 auto Reg = mc2PseudoReg(Src0.getReg()); 3578 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3579 if (isSGPR(Reg, TRI)) { 3580 Error(getRegLoc(Reg, Operands), 3581 "source operand must be either a VGPR or an inline constant"); 3582 return false; 3583 } 3584 3585 return true; 3586 } 3587 3588 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3589 switch (Inst.getOpcode()) { 3590 default: 3591 return true; 3592 case V_DIV_SCALE_F32_gfx6_gfx7: 3593 case V_DIV_SCALE_F32_vi: 3594 case V_DIV_SCALE_F32_gfx10: 3595 case V_DIV_SCALE_F64_gfx6_gfx7: 3596 case V_DIV_SCALE_F64_vi: 3597 case V_DIV_SCALE_F64_gfx10: 3598 break; 3599 } 3600 3601 // TODO: Check that src0 = src1 or src2. 3602 3603 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3604 AMDGPU::OpName::src2_modifiers, 3605 AMDGPU::OpName::src2_modifiers}) { 3606 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3607 .getImm() & 3608 SISrcMods::ABS) { 3609 return false; 3610 } 3611 } 3612 3613 return true; 3614 } 3615 3616 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3617 3618 const unsigned Opc = Inst.getOpcode(); 3619 const MCInstrDesc &Desc = MII.get(Opc); 3620 3621 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3622 return true; 3623 3624 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3625 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3626 if (isCI() || isSI()) 3627 return false; 3628 } 3629 3630 return true; 3631 } 3632 3633 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3634 const unsigned Opc = Inst.getOpcode(); 3635 const MCInstrDesc &Desc = MII.get(Opc); 3636 3637 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3638 return true; 3639 3640 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3641 if (DimIdx < 0) 3642 return true; 3643 3644 long Imm = Inst.getOperand(DimIdx).getImm(); 3645 if (Imm < 0 || Imm >= 8) 3646 return false; 3647 3648 return true; 3649 } 3650 3651 static bool IsRevOpcode(const unsigned Opcode) 3652 { 3653 switch (Opcode) { 3654 case AMDGPU::V_SUBREV_F32_e32: 3655 case AMDGPU::V_SUBREV_F32_e64: 3656 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3657 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3658 case AMDGPU::V_SUBREV_F32_e32_vi: 3659 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3660 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3661 case AMDGPU::V_SUBREV_F32_e64_vi: 3662 3663 case AMDGPU::V_SUBREV_CO_U32_e32: 3664 case AMDGPU::V_SUBREV_CO_U32_e64: 3665 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3666 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3667 3668 case AMDGPU::V_SUBBREV_U32_e32: 3669 case AMDGPU::V_SUBBREV_U32_e64: 3670 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3671 case AMDGPU::V_SUBBREV_U32_e32_vi: 3672 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3673 case AMDGPU::V_SUBBREV_U32_e64_vi: 3674 3675 case AMDGPU::V_SUBREV_U32_e32: 3676 case AMDGPU::V_SUBREV_U32_e64: 3677 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3678 case AMDGPU::V_SUBREV_U32_e32_vi: 3679 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3680 case AMDGPU::V_SUBREV_U32_e64_vi: 3681 3682 case AMDGPU::V_SUBREV_F16_e32: 3683 case AMDGPU::V_SUBREV_F16_e64: 3684 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3685 case AMDGPU::V_SUBREV_F16_e32_vi: 3686 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3687 case AMDGPU::V_SUBREV_F16_e64_vi: 3688 3689 case AMDGPU::V_SUBREV_U16_e32: 3690 case AMDGPU::V_SUBREV_U16_e64: 3691 case AMDGPU::V_SUBREV_U16_e32_vi: 3692 case AMDGPU::V_SUBREV_U16_e64_vi: 3693 3694 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3695 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3696 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3697 3698 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3699 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3700 3701 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3702 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3703 3704 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3705 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3706 3707 case AMDGPU::V_LSHRREV_B32_e32: 3708 case AMDGPU::V_LSHRREV_B32_e64: 3709 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3710 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3711 case AMDGPU::V_LSHRREV_B32_e32_vi: 3712 case AMDGPU::V_LSHRREV_B32_e64_vi: 3713 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3714 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3715 3716 case AMDGPU::V_ASHRREV_I32_e32: 3717 case AMDGPU::V_ASHRREV_I32_e64: 3718 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3719 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3720 case AMDGPU::V_ASHRREV_I32_e32_vi: 3721 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3722 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3723 case AMDGPU::V_ASHRREV_I32_e64_vi: 3724 3725 case AMDGPU::V_LSHLREV_B32_e32: 3726 case AMDGPU::V_LSHLREV_B32_e64: 3727 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3728 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3729 case AMDGPU::V_LSHLREV_B32_e32_vi: 3730 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3731 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3732 case AMDGPU::V_LSHLREV_B32_e64_vi: 3733 3734 case AMDGPU::V_LSHLREV_B16_e32: 3735 case AMDGPU::V_LSHLREV_B16_e64: 3736 case AMDGPU::V_LSHLREV_B16_e32_vi: 3737 case AMDGPU::V_LSHLREV_B16_e64_vi: 3738 case AMDGPU::V_LSHLREV_B16_gfx10: 3739 3740 case AMDGPU::V_LSHRREV_B16_e32: 3741 case AMDGPU::V_LSHRREV_B16_e64: 3742 case AMDGPU::V_LSHRREV_B16_e32_vi: 3743 case AMDGPU::V_LSHRREV_B16_e64_vi: 3744 case AMDGPU::V_LSHRREV_B16_gfx10: 3745 3746 case AMDGPU::V_ASHRREV_I16_e32: 3747 case AMDGPU::V_ASHRREV_I16_e64: 3748 case AMDGPU::V_ASHRREV_I16_e32_vi: 3749 case AMDGPU::V_ASHRREV_I16_e64_vi: 3750 case AMDGPU::V_ASHRREV_I16_gfx10: 3751 3752 case AMDGPU::V_LSHLREV_B64_e64: 3753 case AMDGPU::V_LSHLREV_B64_gfx10: 3754 case AMDGPU::V_LSHLREV_B64_vi: 3755 3756 case AMDGPU::V_LSHRREV_B64_e64: 3757 case AMDGPU::V_LSHRREV_B64_gfx10: 3758 case AMDGPU::V_LSHRREV_B64_vi: 3759 3760 case AMDGPU::V_ASHRREV_I64_e64: 3761 case AMDGPU::V_ASHRREV_I64_gfx10: 3762 case AMDGPU::V_ASHRREV_I64_vi: 3763 3764 case AMDGPU::V_PK_LSHLREV_B16: 3765 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3766 case AMDGPU::V_PK_LSHLREV_B16_vi: 3767 3768 case AMDGPU::V_PK_LSHRREV_B16: 3769 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3770 case AMDGPU::V_PK_LSHRREV_B16_vi: 3771 case AMDGPU::V_PK_ASHRREV_I16: 3772 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3773 case AMDGPU::V_PK_ASHRREV_I16_vi: 3774 return true; 3775 default: 3776 return false; 3777 } 3778 } 3779 3780 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3781 3782 using namespace SIInstrFlags; 3783 const unsigned Opcode = Inst.getOpcode(); 3784 const MCInstrDesc &Desc = MII.get(Opcode); 3785 3786 // lds_direct register is defined so that it can be used 3787 // with 9-bit operands only. Ignore encodings which do not accept these. 3788 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3789 return true; 3790 3791 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3792 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3793 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3794 3795 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3796 3797 // lds_direct cannot be specified as either src1 or src2. 3798 for (int SrcIdx : SrcIndices) { 3799 if (SrcIdx == -1) break; 3800 const MCOperand &Src = Inst.getOperand(SrcIdx); 3801 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3802 return false; 3803 } 3804 } 3805 3806 if (Src0Idx == -1) 3807 return true; 3808 3809 const MCOperand &Src = Inst.getOperand(Src0Idx); 3810 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3811 return true; 3812 3813 // lds_direct is specified as src0. Check additional limitations. 3814 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3815 } 3816 3817 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3818 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3819 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3820 if (Op.isFlatOffset()) 3821 return Op.getStartLoc(); 3822 } 3823 return getLoc(); 3824 } 3825 3826 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3827 const OperandVector &Operands) { 3828 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3829 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3830 return true; 3831 3832 auto Opcode = Inst.getOpcode(); 3833 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3834 assert(OpNum != -1); 3835 3836 const auto &Op = Inst.getOperand(OpNum); 3837 if (!hasFlatOffsets() && Op.getImm() != 0) { 3838 Error(getFlatOffsetLoc(Operands), 3839 "flat offset modifier is not supported on this GPU"); 3840 return false; 3841 } 3842 3843 // For FLAT segment the offset must be positive; 3844 // MSB is ignored and forced to zero. 3845 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3846 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true); 3847 if (!isIntN(OffsetSize, Op.getImm())) { 3848 Error(getFlatOffsetLoc(Operands), 3849 Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset"); 3850 return false; 3851 } 3852 } else { 3853 unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false); 3854 if (!isUIntN(OffsetSize, Op.getImm())) { 3855 Error(getFlatOffsetLoc(Operands), 3856 Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset"); 3857 return false; 3858 } 3859 } 3860 3861 return true; 3862 } 3863 3864 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3865 // Start with second operand because SMEM Offset cannot be dst or src0. 3866 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3867 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3868 if (Op.isSMEMOffset()) 3869 return Op.getStartLoc(); 3870 } 3871 return getLoc(); 3872 } 3873 3874 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3875 const OperandVector &Operands) { 3876 if (isCI() || isSI()) 3877 return true; 3878 3879 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3880 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3881 return true; 3882 3883 auto Opcode = Inst.getOpcode(); 3884 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3885 if (OpNum == -1) 3886 return true; 3887 3888 const auto &Op = Inst.getOperand(OpNum); 3889 if (!Op.isImm()) 3890 return true; 3891 3892 uint64_t Offset = Op.getImm(); 3893 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3894 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3895 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3896 return true; 3897 3898 Error(getSMEMOffsetLoc(Operands), 3899 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3900 "expected a 21-bit signed offset"); 3901 3902 return false; 3903 } 3904 3905 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3906 unsigned Opcode = Inst.getOpcode(); 3907 const MCInstrDesc &Desc = MII.get(Opcode); 3908 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3909 return true; 3910 3911 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3912 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3913 3914 const int OpIndices[] = { Src0Idx, Src1Idx }; 3915 3916 unsigned NumExprs = 0; 3917 unsigned NumLiterals = 0; 3918 uint32_t LiteralValue; 3919 3920 for (int OpIdx : OpIndices) { 3921 if (OpIdx == -1) break; 3922 3923 const MCOperand &MO = Inst.getOperand(OpIdx); 3924 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3925 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3926 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3927 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3928 if (NumLiterals == 0 || LiteralValue != Value) { 3929 LiteralValue = Value; 3930 ++NumLiterals; 3931 } 3932 } else if (MO.isExpr()) { 3933 ++NumExprs; 3934 } 3935 } 3936 } 3937 3938 return NumLiterals + NumExprs <= 1; 3939 } 3940 3941 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3942 const unsigned Opc = Inst.getOpcode(); 3943 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3944 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3945 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3946 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3947 3948 if (OpSel & ~3) 3949 return false; 3950 } 3951 return true; 3952 } 3953 3954 // Check if VCC register matches wavefront size 3955 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3956 auto FB = getFeatureBits(); 3957 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3958 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3959 } 3960 3961 // VOP3 literal is only allowed in GFX10+ and only one can be used 3962 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3963 const OperandVector &Operands) { 3964 unsigned Opcode = Inst.getOpcode(); 3965 const MCInstrDesc &Desc = MII.get(Opcode); 3966 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3967 return true; 3968 3969 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3970 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3971 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3972 3973 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3974 3975 unsigned NumExprs = 0; 3976 unsigned NumLiterals = 0; 3977 uint32_t LiteralValue; 3978 3979 for (int OpIdx : OpIndices) { 3980 if (OpIdx == -1) break; 3981 3982 const MCOperand &MO = Inst.getOperand(OpIdx); 3983 if (!MO.isImm() && !MO.isExpr()) 3984 continue; 3985 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3986 continue; 3987 3988 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3989 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3990 Error(getConstLoc(Operands), 3991 "inline constants are not allowed for this operand"); 3992 return false; 3993 } 3994 3995 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3996 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3997 if (NumLiterals == 0 || LiteralValue != Value) { 3998 LiteralValue = Value; 3999 ++NumLiterals; 4000 } 4001 } else if (MO.isExpr()) { 4002 ++NumExprs; 4003 } 4004 } 4005 NumLiterals += NumExprs; 4006 4007 if (!NumLiterals) 4008 return true; 4009 4010 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 4011 Error(getLitLoc(Operands), "literal operands are not supported"); 4012 return false; 4013 } 4014 4015 if (NumLiterals > 1) { 4016 Error(getLitLoc(Operands), "only one literal operand is allowed"); 4017 return false; 4018 } 4019 4020 return true; 4021 } 4022 4023 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR. 4024 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx, 4025 const MCRegisterInfo *MRI) { 4026 int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx); 4027 if (OpIdx < 0) 4028 return -1; 4029 4030 const MCOperand &Op = Inst.getOperand(OpIdx); 4031 if (!Op.isReg()) 4032 return -1; 4033 4034 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4035 auto Reg = Sub ? Sub : Op.getReg(); 4036 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4037 return AGRP32.contains(Reg) ? 1 : 0; 4038 } 4039 4040 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const { 4041 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 4042 if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF | 4043 SIInstrFlags::MTBUF | SIInstrFlags::MIMG | 4044 SIInstrFlags::DS)) == 0) 4045 return true; 4046 4047 uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0 4048 : AMDGPU::OpName::vdata; 4049 4050 const MCRegisterInfo *MRI = getMRI(); 4051 int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI); 4052 int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI); 4053 4054 if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) { 4055 int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI); 4056 if (Data2Areg >= 0 && Data2Areg != DataAreg) 4057 return false; 4058 } 4059 4060 auto FB = getFeatureBits(); 4061 if (FB[AMDGPU::FeatureGFX90AInsts]) { 4062 if (DataAreg < 0 || DstAreg < 0) 4063 return true; 4064 return DstAreg == DataAreg; 4065 } 4066 4067 return DstAreg < 1 && DataAreg < 1; 4068 } 4069 4070 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const { 4071 auto FB = getFeatureBits(); 4072 if (!FB[AMDGPU::FeatureGFX90AInsts]) 4073 return true; 4074 4075 const MCRegisterInfo *MRI = getMRI(); 4076 const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID); 4077 const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID); 4078 for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) { 4079 const MCOperand &Op = Inst.getOperand(I); 4080 if (!Op.isReg()) 4081 continue; 4082 4083 unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0); 4084 if (!Sub) 4085 continue; 4086 4087 if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1)) 4088 return false; 4089 if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1)) 4090 return false; 4091 } 4092 4093 return true; 4094 } 4095 4096 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 4097 const OperandVector &Operands, 4098 const SMLoc &IDLoc) { 4099 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 4100 AMDGPU::OpName::glc1); 4101 if (GLCPos != -1) { 4102 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 4103 // in the asm string, and the default value means it is not present. 4104 if (Inst.getOperand(GLCPos).getImm() == -1) { 4105 Error(IDLoc, "instruction must use glc"); 4106 return false; 4107 } 4108 } 4109 4110 return true; 4111 } 4112 4113 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 4114 const SMLoc &IDLoc, 4115 const OperandVector &Operands) { 4116 if (!validateLdsDirect(Inst)) { 4117 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 4118 "invalid use of lds_direct"); 4119 return false; 4120 } 4121 if (!validateSOPLiteral(Inst)) { 4122 Error(getLitLoc(Operands), 4123 "only one literal operand is allowed"); 4124 return false; 4125 } 4126 if (!validateVOP3Literal(Inst, Operands)) { 4127 return false; 4128 } 4129 if (!validateConstantBusLimitations(Inst, Operands)) { 4130 return false; 4131 } 4132 if (!validateEarlyClobberLimitations(Inst, Operands)) { 4133 return false; 4134 } 4135 if (!validateIntClampSupported(Inst)) { 4136 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 4137 "integer clamping is not supported on this GPU"); 4138 return false; 4139 } 4140 if (!validateOpSel(Inst)) { 4141 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 4142 "invalid op_sel operand"); 4143 return false; 4144 } 4145 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 4146 if (!validateMIMGD16(Inst)) { 4147 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 4148 "d16 modifier is not supported on this GPU"); 4149 return false; 4150 } 4151 if (!validateMIMGDim(Inst)) { 4152 Error(IDLoc, "dim modifier is required on this GPU"); 4153 return false; 4154 } 4155 if (!validateMIMGMSAA(Inst)) { 4156 Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands), 4157 "invalid dim; must be MSAA type"); 4158 return false; 4159 } 4160 if (!validateMIMGDataSize(Inst)) { 4161 Error(IDLoc, 4162 "image data size does not match dmask and tfe"); 4163 return false; 4164 } 4165 if (!validateMIMGAddrSize(Inst)) { 4166 Error(IDLoc, 4167 "image address size does not match dim and a16"); 4168 return false; 4169 } 4170 if (!validateMIMGAtomicDMask(Inst)) { 4171 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4172 "invalid atomic image dmask"); 4173 return false; 4174 } 4175 if (!validateMIMGGatherDMask(Inst)) { 4176 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 4177 "invalid image_gather dmask: only one bit must be set"); 4178 return false; 4179 } 4180 if (!validateMovrels(Inst, Operands)) { 4181 return false; 4182 } 4183 if (!validateFlatOffset(Inst, Operands)) { 4184 return false; 4185 } 4186 if (!validateSMEMOffset(Inst, Operands)) { 4187 return false; 4188 } 4189 if (!validateMAIAccWrite(Inst, Operands)) { 4190 return false; 4191 } 4192 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4193 return false; 4194 } 4195 4196 if (!validateAGPRLdSt(Inst)) { 4197 Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts] 4198 ? "invalid register class: data and dst should be all VGPR or AGPR" 4199 : "invalid register class: agpr loads and stores not supported on this GPU" 4200 ); 4201 return false; 4202 } 4203 if (!validateVGPRAlign(Inst)) { 4204 Error(IDLoc, 4205 "invalid register class: vgpr tuples must be 64 bit aligned"); 4206 return false; 4207 } 4208 4209 if (!validateDivScale(Inst)) { 4210 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 4211 return false; 4212 } 4213 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 4214 return false; 4215 } 4216 4217 return true; 4218 } 4219 4220 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 4221 const FeatureBitset &FBS, 4222 unsigned VariantID = 0); 4223 4224 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 4225 const FeatureBitset &AvailableFeatures, 4226 unsigned VariantID); 4227 4228 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4229 const FeatureBitset &FBS) { 4230 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 4231 } 4232 4233 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 4234 const FeatureBitset &FBS, 4235 ArrayRef<unsigned> Variants) { 4236 for (auto Variant : Variants) { 4237 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 4238 return true; 4239 } 4240 4241 return false; 4242 } 4243 4244 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 4245 const SMLoc &IDLoc) { 4246 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 4247 4248 // Check if requested instruction variant is supported. 4249 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 4250 return false; 4251 4252 // This instruction is not supported. 4253 // Clear any other pending errors because they are no longer relevant. 4254 getParser().clearPendingErrors(); 4255 4256 // Requested instruction variant is not supported. 4257 // Check if any other variants are supported. 4258 StringRef VariantName = getMatchedVariantName(); 4259 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 4260 return Error(IDLoc, 4261 Twine(VariantName, 4262 " variant of this instruction is not supported")); 4263 } 4264 4265 // Finally check if this instruction is supported on any other GPU. 4266 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 4267 return Error(IDLoc, "instruction not supported on this GPU"); 4268 } 4269 4270 // Instruction not supported on any GPU. Probably a typo. 4271 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 4272 return Error(IDLoc, "invalid instruction" + Suggestion); 4273 } 4274 4275 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 4276 OperandVector &Operands, 4277 MCStreamer &Out, 4278 uint64_t &ErrorInfo, 4279 bool MatchingInlineAsm) { 4280 MCInst Inst; 4281 unsigned Result = Match_Success; 4282 for (auto Variant : getMatchedVariants()) { 4283 uint64_t EI; 4284 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 4285 Variant); 4286 // We order match statuses from least to most specific. We use most specific 4287 // status as resulting 4288 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4289 if ((R == Match_Success) || 4290 (R == Match_PreferE32) || 4291 (R == Match_MissingFeature && Result != Match_PreferE32) || 4292 (R == Match_InvalidOperand && Result != Match_MissingFeature 4293 && Result != Match_PreferE32) || 4294 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4295 && Result != Match_MissingFeature 4296 && Result != Match_PreferE32)) { 4297 Result = R; 4298 ErrorInfo = EI; 4299 } 4300 if (R == Match_Success) 4301 break; 4302 } 4303 4304 if (Result == Match_Success) { 4305 if (!validateInstruction(Inst, IDLoc, Operands)) { 4306 return true; 4307 } 4308 Inst.setLoc(IDLoc); 4309 Out.emitInstruction(Inst, getSTI()); 4310 return false; 4311 } 4312 4313 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4314 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4315 return true; 4316 } 4317 4318 switch (Result) { 4319 default: break; 4320 case Match_MissingFeature: 4321 // It has been verified that the specified instruction 4322 // mnemonic is valid. A match was found but it requires 4323 // features which are not supported on this GPU. 4324 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4325 4326 case Match_InvalidOperand: { 4327 SMLoc ErrorLoc = IDLoc; 4328 if (ErrorInfo != ~0ULL) { 4329 if (ErrorInfo >= Operands.size()) { 4330 return Error(IDLoc, "too few operands for instruction"); 4331 } 4332 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4333 if (ErrorLoc == SMLoc()) 4334 ErrorLoc = IDLoc; 4335 } 4336 return Error(ErrorLoc, "invalid operand for instruction"); 4337 } 4338 4339 case Match_PreferE32: 4340 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4341 "should be encoded as e32"); 4342 case Match_MnemonicFail: 4343 llvm_unreachable("Invalid instructions should have been handled already"); 4344 } 4345 llvm_unreachable("Implement any new match types added!"); 4346 } 4347 4348 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4349 int64_t Tmp = -1; 4350 if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) { 4351 return true; 4352 } 4353 if (getParser().parseAbsoluteExpression(Tmp)) { 4354 return true; 4355 } 4356 Ret = static_cast<uint32_t>(Tmp); 4357 return false; 4358 } 4359 4360 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4361 uint32_t &Minor) { 4362 if (ParseAsAbsoluteExpression(Major)) 4363 return TokError("invalid major version"); 4364 4365 if (!trySkipToken(AsmToken::Comma)) 4366 return TokError("minor version number required, comma expected"); 4367 4368 if (ParseAsAbsoluteExpression(Minor)) 4369 return TokError("invalid minor version"); 4370 4371 return false; 4372 } 4373 4374 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4375 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4376 return TokError("directive only supported for amdgcn architecture"); 4377 4378 std::string Target; 4379 4380 SMLoc TargetStart = getLoc(); 4381 if (getParser().parseEscapedString(Target)) 4382 return true; 4383 SMRange TargetRange = SMRange(TargetStart, getLoc()); 4384 4385 std::string ExpectedTarget; 4386 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4387 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4388 4389 if (Target != ExpectedTargetOS.str()) 4390 return Error(TargetRange.Start, "target must match options", TargetRange); 4391 4392 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4393 return false; 4394 } 4395 4396 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4397 return Error(Range.Start, "value out of range", Range); 4398 } 4399 4400 bool AMDGPUAsmParser::calculateGPRBlocks( 4401 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4402 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4403 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4404 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4405 // TODO(scott.linder): These calculations are duplicated from 4406 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4407 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4408 4409 unsigned NumVGPRs = NextFreeVGPR; 4410 unsigned NumSGPRs = NextFreeSGPR; 4411 4412 if (Version.Major >= 10) 4413 NumSGPRs = 0; 4414 else { 4415 unsigned MaxAddressableNumSGPRs = 4416 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4417 4418 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4419 NumSGPRs > MaxAddressableNumSGPRs) 4420 return OutOfRangeError(SGPRRange); 4421 4422 NumSGPRs += 4423 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4424 4425 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4426 NumSGPRs > MaxAddressableNumSGPRs) 4427 return OutOfRangeError(SGPRRange); 4428 4429 if (Features.test(FeatureSGPRInitBug)) 4430 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4431 } 4432 4433 VGPRBlocks = 4434 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4435 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4436 4437 return false; 4438 } 4439 4440 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4441 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4442 return TokError("directive only supported for amdgcn architecture"); 4443 4444 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4445 return TokError("directive only supported for amdhsa OS"); 4446 4447 StringRef KernelName; 4448 if (getParser().parseIdentifier(KernelName)) 4449 return true; 4450 4451 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4452 4453 StringSet<> Seen; 4454 4455 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4456 4457 SMRange VGPRRange; 4458 uint64_t NextFreeVGPR = 0; 4459 uint64_t AccumOffset = 0; 4460 SMRange SGPRRange; 4461 uint64_t NextFreeSGPR = 0; 4462 unsigned UserSGPRCount = 0; 4463 bool ReserveVCC = true; 4464 bool ReserveFlatScr = true; 4465 bool ReserveXNACK = hasXNACK(); 4466 Optional<bool> EnableWavefrontSize32; 4467 4468 while (true) { 4469 while (trySkipToken(AsmToken::EndOfStatement)); 4470 4471 StringRef ID; 4472 SMRange IDRange = getTok().getLocRange(); 4473 if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel")) 4474 return true; 4475 4476 if (ID == ".end_amdhsa_kernel") 4477 break; 4478 4479 if (Seen.find(ID) != Seen.end()) 4480 return TokError(".amdhsa_ directives cannot be repeated"); 4481 Seen.insert(ID); 4482 4483 SMLoc ValStart = getLoc(); 4484 int64_t IVal; 4485 if (getParser().parseAbsoluteExpression(IVal)) 4486 return true; 4487 SMLoc ValEnd = getLoc(); 4488 SMRange ValRange = SMRange(ValStart, ValEnd); 4489 4490 if (IVal < 0) 4491 return OutOfRangeError(ValRange); 4492 4493 uint64_t Val = IVal; 4494 4495 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4496 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4497 return OutOfRangeError(RANGE); \ 4498 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4499 4500 if (ID == ".amdhsa_group_segment_fixed_size") { 4501 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4502 return OutOfRangeError(ValRange); 4503 KD.group_segment_fixed_size = Val; 4504 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4505 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4506 return OutOfRangeError(ValRange); 4507 KD.private_segment_fixed_size = Val; 4508 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4509 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4510 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4511 Val, ValRange); 4512 if (Val) 4513 UserSGPRCount += 4; 4514 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4515 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4516 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4517 ValRange); 4518 if (Val) 4519 UserSGPRCount += 2; 4520 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4521 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4522 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4523 ValRange); 4524 if (Val) 4525 UserSGPRCount += 2; 4526 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4527 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4528 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4529 Val, ValRange); 4530 if (Val) 4531 UserSGPRCount += 2; 4532 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4533 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4534 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4535 ValRange); 4536 if (Val) 4537 UserSGPRCount += 2; 4538 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4539 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4540 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4541 ValRange); 4542 if (Val) 4543 UserSGPRCount += 2; 4544 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4545 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4546 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4547 Val, ValRange); 4548 if (Val) 4549 UserSGPRCount += 1; 4550 } else if (ID == ".amdhsa_wavefront_size32") { 4551 if (IVersion.Major < 10) 4552 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4553 EnableWavefrontSize32 = Val; 4554 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4555 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4556 Val, ValRange); 4557 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4558 PARSE_BITS_ENTRY( 4559 KD.compute_pgm_rsrc2, 4560 COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, 4561 ValRange); 4562 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4563 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4564 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4565 ValRange); 4566 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4567 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4568 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4569 ValRange); 4570 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4571 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4572 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4573 ValRange); 4574 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4575 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4576 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4577 ValRange); 4578 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4579 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4580 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4581 ValRange); 4582 } else if (ID == ".amdhsa_next_free_vgpr") { 4583 VGPRRange = ValRange; 4584 NextFreeVGPR = Val; 4585 } else if (ID == ".amdhsa_next_free_sgpr") { 4586 SGPRRange = ValRange; 4587 NextFreeSGPR = Val; 4588 } else if (ID == ".amdhsa_accum_offset") { 4589 if (!isGFX90A()) 4590 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4591 AccumOffset = Val; 4592 } else if (ID == ".amdhsa_reserve_vcc") { 4593 if (!isUInt<1>(Val)) 4594 return OutOfRangeError(ValRange); 4595 ReserveVCC = Val; 4596 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4597 if (IVersion.Major < 7) 4598 return Error(IDRange.Start, "directive requires gfx7+", IDRange); 4599 if (!isUInt<1>(Val)) 4600 return OutOfRangeError(ValRange); 4601 ReserveFlatScr = Val; 4602 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4603 if (IVersion.Major < 8) 4604 return Error(IDRange.Start, "directive requires gfx8+", IDRange); 4605 if (!isUInt<1>(Val)) 4606 return OutOfRangeError(ValRange); 4607 ReserveXNACK = Val; 4608 } else if (ID == ".amdhsa_float_round_mode_32") { 4609 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4610 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4611 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4612 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4613 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4614 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4615 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4616 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4617 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4618 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4619 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4620 ValRange); 4621 } else if (ID == ".amdhsa_dx10_clamp") { 4622 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4623 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4624 } else if (ID == ".amdhsa_ieee_mode") { 4625 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4626 Val, ValRange); 4627 } else if (ID == ".amdhsa_fp16_overflow") { 4628 if (IVersion.Major < 9) 4629 return Error(IDRange.Start, "directive requires gfx9+", IDRange); 4630 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4631 ValRange); 4632 } else if (ID == ".amdhsa_tg_split") { 4633 if (!isGFX90A()) 4634 return Error(IDRange.Start, "directive requires gfx90a+", IDRange); 4635 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val, 4636 ValRange); 4637 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4638 if (IVersion.Major < 10) 4639 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4640 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4641 ValRange); 4642 } else if (ID == ".amdhsa_memory_ordered") { 4643 if (IVersion.Major < 10) 4644 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4645 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4646 ValRange); 4647 } else if (ID == ".amdhsa_forward_progress") { 4648 if (IVersion.Major < 10) 4649 return Error(IDRange.Start, "directive requires gfx10+", IDRange); 4650 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4651 ValRange); 4652 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4653 PARSE_BITS_ENTRY( 4654 KD.compute_pgm_rsrc2, 4655 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4656 ValRange); 4657 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4658 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4659 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4660 Val, ValRange); 4661 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4662 PARSE_BITS_ENTRY( 4663 KD.compute_pgm_rsrc2, 4664 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4665 ValRange); 4666 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4667 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4668 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4669 Val, ValRange); 4670 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4671 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4672 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4673 Val, ValRange); 4674 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4675 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4676 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4677 Val, ValRange); 4678 } else if (ID == ".amdhsa_exception_int_div_zero") { 4679 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4680 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4681 Val, ValRange); 4682 } else { 4683 return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange); 4684 } 4685 4686 #undef PARSE_BITS_ENTRY 4687 } 4688 4689 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4690 return TokError(".amdhsa_next_free_vgpr directive is required"); 4691 4692 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4693 return TokError(".amdhsa_next_free_sgpr directive is required"); 4694 4695 unsigned VGPRBlocks; 4696 unsigned SGPRBlocks; 4697 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4698 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4699 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4700 SGPRBlocks)) 4701 return true; 4702 4703 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4704 VGPRBlocks)) 4705 return OutOfRangeError(VGPRRange); 4706 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4707 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4708 4709 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4710 SGPRBlocks)) 4711 return OutOfRangeError(SGPRRange); 4712 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4713 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4714 SGPRBlocks); 4715 4716 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4717 return TokError("too many user SGPRs enabled"); 4718 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4719 UserSGPRCount); 4720 4721 if (isGFX90A()) { 4722 if (Seen.find(".amdhsa_accum_offset") == Seen.end()) 4723 return TokError(".amdhsa_accum_offset directive is required"); 4724 if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3)) 4725 return TokError("accum_offset should be in range [4..256] in " 4726 "increments of 4"); 4727 if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4)) 4728 return TokError("accum_offset exceeds total VGPR allocation"); 4729 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET, 4730 (AccumOffset / 4 - 1)); 4731 } 4732 4733 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4734 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4735 ReserveFlatScr, ReserveXNACK); 4736 return false; 4737 } 4738 4739 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4740 uint32_t Major; 4741 uint32_t Minor; 4742 4743 if (ParseDirectiveMajorMinor(Major, Minor)) 4744 return true; 4745 4746 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4747 return false; 4748 } 4749 4750 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4751 uint32_t Major; 4752 uint32_t Minor; 4753 uint32_t Stepping; 4754 StringRef VendorName; 4755 StringRef ArchName; 4756 4757 // If this directive has no arguments, then use the ISA version for the 4758 // targeted GPU. 4759 if (isToken(AsmToken::EndOfStatement)) { 4760 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4761 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4762 ISA.Stepping, 4763 "AMD", "AMDGPU"); 4764 return false; 4765 } 4766 4767 if (ParseDirectiveMajorMinor(Major, Minor)) 4768 return true; 4769 4770 if (!trySkipToken(AsmToken::Comma)) 4771 return TokError("stepping version number required, comma expected"); 4772 4773 if (ParseAsAbsoluteExpression(Stepping)) 4774 return TokError("invalid stepping version"); 4775 4776 if (!trySkipToken(AsmToken::Comma)) 4777 return TokError("vendor name required, comma expected"); 4778 4779 if (!parseString(VendorName, "invalid vendor name")) 4780 return true; 4781 4782 if (!trySkipToken(AsmToken::Comma)) 4783 return TokError("arch name required, comma expected"); 4784 4785 if (!parseString(ArchName, "invalid arch name")) 4786 return true; 4787 4788 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4789 VendorName, ArchName); 4790 return false; 4791 } 4792 4793 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4794 amd_kernel_code_t &Header) { 4795 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4796 // assembly for backwards compatibility. 4797 if (ID == "max_scratch_backing_memory_byte_size") { 4798 Parser.eatToEndOfStatement(); 4799 return false; 4800 } 4801 4802 SmallString<40> ErrStr; 4803 raw_svector_ostream Err(ErrStr); 4804 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4805 return TokError(Err.str()); 4806 } 4807 Lex(); 4808 4809 if (ID == "enable_wavefront_size32") { 4810 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4811 if (!isGFX10Plus()) 4812 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4813 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4814 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4815 } else { 4816 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4817 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4818 } 4819 } 4820 4821 if (ID == "wavefront_size") { 4822 if (Header.wavefront_size == 5) { 4823 if (!isGFX10Plus()) 4824 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4825 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4826 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4827 } else if (Header.wavefront_size == 6) { 4828 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4829 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4830 } 4831 } 4832 4833 if (ID == "enable_wgp_mode") { 4834 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4835 !isGFX10Plus()) 4836 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4837 } 4838 4839 if (ID == "enable_mem_ordered") { 4840 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4841 !isGFX10Plus()) 4842 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4843 } 4844 4845 if (ID == "enable_fwd_progress") { 4846 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4847 !isGFX10Plus()) 4848 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4849 } 4850 4851 return false; 4852 } 4853 4854 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4855 amd_kernel_code_t Header; 4856 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4857 4858 while (true) { 4859 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4860 // will set the current token to EndOfStatement. 4861 while(trySkipToken(AsmToken::EndOfStatement)); 4862 4863 StringRef ID; 4864 if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t")) 4865 return true; 4866 4867 if (ID == ".end_amd_kernel_code_t") 4868 break; 4869 4870 if (ParseAMDKernelCodeTValue(ID, Header)) 4871 return true; 4872 } 4873 4874 getTargetStreamer().EmitAMDKernelCodeT(Header); 4875 4876 return false; 4877 } 4878 4879 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4880 StringRef KernelName; 4881 if (!parseId(KernelName, "expected symbol name")) 4882 return true; 4883 4884 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4885 ELF::STT_AMDGPU_HSA_KERNEL); 4886 4887 KernelScope.initialize(getContext()); 4888 return false; 4889 } 4890 4891 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4892 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4893 return Error(getLoc(), 4894 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4895 "architectures"); 4896 } 4897 4898 auto ISAVersionStringFromASM = getToken().getStringContents(); 4899 4900 std::string ISAVersionStringFromSTI; 4901 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4902 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4903 4904 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4905 return Error(getLoc(), 4906 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4907 "arguments specified through the command line"); 4908 } 4909 4910 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4911 Lex(); 4912 4913 return false; 4914 } 4915 4916 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4917 const char *AssemblerDirectiveBegin; 4918 const char *AssemblerDirectiveEnd; 4919 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4920 isHsaAbiVersion3(&getSTI()) 4921 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4922 HSAMD::V3::AssemblerDirectiveEnd) 4923 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4924 HSAMD::AssemblerDirectiveEnd); 4925 4926 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4927 return Error(getLoc(), 4928 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4929 "not available on non-amdhsa OSes")).str()); 4930 } 4931 4932 std::string HSAMetadataString; 4933 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4934 HSAMetadataString)) 4935 return true; 4936 4937 if (isHsaAbiVersion3(&getSTI())) { 4938 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4939 return Error(getLoc(), "invalid HSA metadata"); 4940 } else { 4941 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4942 return Error(getLoc(), "invalid HSA metadata"); 4943 } 4944 4945 return false; 4946 } 4947 4948 /// Common code to parse out a block of text (typically YAML) between start and 4949 /// end directives. 4950 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4951 const char *AssemblerDirectiveEnd, 4952 std::string &CollectString) { 4953 4954 raw_string_ostream CollectStream(CollectString); 4955 4956 getLexer().setSkipSpace(false); 4957 4958 bool FoundEnd = false; 4959 while (!isToken(AsmToken::Eof)) { 4960 while (isToken(AsmToken::Space)) { 4961 CollectStream << getTokenStr(); 4962 Lex(); 4963 } 4964 4965 if (trySkipId(AssemblerDirectiveEnd)) { 4966 FoundEnd = true; 4967 break; 4968 } 4969 4970 CollectStream << Parser.parseStringToEndOfStatement() 4971 << getContext().getAsmInfo()->getSeparatorString(); 4972 4973 Parser.eatToEndOfStatement(); 4974 } 4975 4976 getLexer().setSkipSpace(true); 4977 4978 if (isToken(AsmToken::Eof) && !FoundEnd) { 4979 return TokError(Twine("expected directive ") + 4980 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4981 } 4982 4983 CollectStream.flush(); 4984 return false; 4985 } 4986 4987 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4988 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4989 std::string String; 4990 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4991 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4992 return true; 4993 4994 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4995 if (!PALMetadata->setFromString(String)) 4996 return Error(getLoc(), "invalid PAL metadata"); 4997 return false; 4998 } 4999 5000 /// Parse the assembler directive for old linear-format PAL metadata. 5001 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 5002 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 5003 return Error(getLoc(), 5004 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 5005 "not available on non-amdpal OSes")).str()); 5006 } 5007 5008 auto PALMetadata = getTargetStreamer().getPALMetadata(); 5009 PALMetadata->setLegacy(); 5010 for (;;) { 5011 uint32_t Key, Value; 5012 if (ParseAsAbsoluteExpression(Key)) { 5013 return TokError(Twine("invalid value in ") + 5014 Twine(PALMD::AssemblerDirective)); 5015 } 5016 if (!trySkipToken(AsmToken::Comma)) { 5017 return TokError(Twine("expected an even number of values in ") + 5018 Twine(PALMD::AssemblerDirective)); 5019 } 5020 if (ParseAsAbsoluteExpression(Value)) { 5021 return TokError(Twine("invalid value in ") + 5022 Twine(PALMD::AssemblerDirective)); 5023 } 5024 PALMetadata->setRegister(Key, Value); 5025 if (!trySkipToken(AsmToken::Comma)) 5026 break; 5027 } 5028 return false; 5029 } 5030 5031 /// ParseDirectiveAMDGPULDS 5032 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 5033 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 5034 if (getParser().checkForValidSection()) 5035 return true; 5036 5037 StringRef Name; 5038 SMLoc NameLoc = getLoc(); 5039 if (getParser().parseIdentifier(Name)) 5040 return TokError("expected identifier in directive"); 5041 5042 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 5043 if (parseToken(AsmToken::Comma, "expected ','")) 5044 return true; 5045 5046 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 5047 5048 int64_t Size; 5049 SMLoc SizeLoc = getLoc(); 5050 if (getParser().parseAbsoluteExpression(Size)) 5051 return true; 5052 if (Size < 0) 5053 return Error(SizeLoc, "size must be non-negative"); 5054 if (Size > LocalMemorySize) 5055 return Error(SizeLoc, "size is too large"); 5056 5057 int64_t Alignment = 4; 5058 if (trySkipToken(AsmToken::Comma)) { 5059 SMLoc AlignLoc = getLoc(); 5060 if (getParser().parseAbsoluteExpression(Alignment)) 5061 return true; 5062 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 5063 return Error(AlignLoc, "alignment must be a power of two"); 5064 5065 // Alignment larger than the size of LDS is possible in theory, as long 5066 // as the linker manages to place to symbol at address 0, but we do want 5067 // to make sure the alignment fits nicely into a 32-bit integer. 5068 if (Alignment >= 1u << 31) 5069 return Error(AlignLoc, "alignment is too large"); 5070 } 5071 5072 if (parseToken(AsmToken::EndOfStatement, 5073 "unexpected token in '.amdgpu_lds' directive")) 5074 return true; 5075 5076 Symbol->redefineIfPossible(); 5077 if (!Symbol->isUndefined()) 5078 return Error(NameLoc, "invalid symbol redefinition"); 5079 5080 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 5081 return false; 5082 } 5083 5084 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 5085 StringRef IDVal = DirectiveID.getString(); 5086 5087 if (isHsaAbiVersion3(&getSTI())) { 5088 if (IDVal == ".amdgcn_target") 5089 return ParseDirectiveAMDGCNTarget(); 5090 5091 if (IDVal == ".amdhsa_kernel") 5092 return ParseDirectiveAMDHSAKernel(); 5093 5094 // TODO: Restructure/combine with PAL metadata directive. 5095 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 5096 return ParseDirectiveHSAMetadata(); 5097 } else { 5098 if (IDVal == ".hsa_code_object_version") 5099 return ParseDirectiveHSACodeObjectVersion(); 5100 5101 if (IDVal == ".hsa_code_object_isa") 5102 return ParseDirectiveHSACodeObjectISA(); 5103 5104 if (IDVal == ".amd_kernel_code_t") 5105 return ParseDirectiveAMDKernelCodeT(); 5106 5107 if (IDVal == ".amdgpu_hsa_kernel") 5108 return ParseDirectiveAMDGPUHsaKernel(); 5109 5110 if (IDVal == ".amd_amdgpu_isa") 5111 return ParseDirectiveISAVersion(); 5112 5113 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 5114 return ParseDirectiveHSAMetadata(); 5115 } 5116 5117 if (IDVal == ".amdgpu_lds") 5118 return ParseDirectiveAMDGPULDS(); 5119 5120 if (IDVal == PALMD::AssemblerDirectiveBegin) 5121 return ParseDirectivePALMetadataBegin(); 5122 5123 if (IDVal == PALMD::AssemblerDirective) 5124 return ParseDirectivePALMetadata(); 5125 5126 return true; 5127 } 5128 5129 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 5130 unsigned RegNo) const { 5131 5132 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 5133 R.isValid(); ++R) { 5134 if (*R == RegNo) 5135 return isGFX9Plus(); 5136 } 5137 5138 // GFX10 has 2 more SGPRs 104 and 105. 5139 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 5140 R.isValid(); ++R) { 5141 if (*R == RegNo) 5142 return hasSGPR104_SGPR105(); 5143 } 5144 5145 switch (RegNo) { 5146 case AMDGPU::SRC_SHARED_BASE: 5147 case AMDGPU::SRC_SHARED_LIMIT: 5148 case AMDGPU::SRC_PRIVATE_BASE: 5149 case AMDGPU::SRC_PRIVATE_LIMIT: 5150 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 5151 return isGFX9Plus(); 5152 case AMDGPU::TBA: 5153 case AMDGPU::TBA_LO: 5154 case AMDGPU::TBA_HI: 5155 case AMDGPU::TMA: 5156 case AMDGPU::TMA_LO: 5157 case AMDGPU::TMA_HI: 5158 return !isGFX9Plus(); 5159 case AMDGPU::XNACK_MASK: 5160 case AMDGPU::XNACK_MASK_LO: 5161 case AMDGPU::XNACK_MASK_HI: 5162 return (isVI() || isGFX9()) && hasXNACK(); 5163 case AMDGPU::SGPR_NULL: 5164 return isGFX10Plus(); 5165 default: 5166 break; 5167 } 5168 5169 if (isCI()) 5170 return true; 5171 5172 if (isSI() || isGFX10Plus()) { 5173 // No flat_scr on SI. 5174 // On GFX10 flat scratch is not a valid register operand and can only be 5175 // accessed with s_setreg/s_getreg. 5176 switch (RegNo) { 5177 case AMDGPU::FLAT_SCR: 5178 case AMDGPU::FLAT_SCR_LO: 5179 case AMDGPU::FLAT_SCR_HI: 5180 return false; 5181 default: 5182 return true; 5183 } 5184 } 5185 5186 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 5187 // SI/CI have. 5188 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 5189 R.isValid(); ++R) { 5190 if (*R == RegNo) 5191 return hasSGPR102_SGPR103(); 5192 } 5193 5194 return true; 5195 } 5196 5197 OperandMatchResultTy 5198 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 5199 OperandMode Mode) { 5200 // Try to parse with a custom parser 5201 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 5202 5203 // If we successfully parsed the operand or if there as an error parsing, 5204 // we are done. 5205 // 5206 // If we are parsing after we reach EndOfStatement then this means we 5207 // are appending default values to the Operands list. This is only done 5208 // by custom parser, so we shouldn't continue on to the generic parsing. 5209 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 5210 isToken(AsmToken::EndOfStatement)) 5211 return ResTy; 5212 5213 SMLoc RBraceLoc; 5214 SMLoc LBraceLoc = getLoc(); 5215 if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) { 5216 unsigned Prefix = Operands.size(); 5217 5218 for (;;) { 5219 auto Loc = getLoc(); 5220 ResTy = parseReg(Operands); 5221 if (ResTy == MatchOperand_NoMatch) 5222 Error(Loc, "expected a register"); 5223 if (ResTy != MatchOperand_Success) 5224 return MatchOperand_ParseFail; 5225 5226 RBraceLoc = getLoc(); 5227 if (trySkipToken(AsmToken::RBrac)) 5228 break; 5229 5230 if (!skipToken(AsmToken::Comma, 5231 "expected a comma or a closing square bracket")) { 5232 return MatchOperand_ParseFail; 5233 } 5234 } 5235 5236 if (Operands.size() - Prefix > 1) { 5237 Operands.insert(Operands.begin() + Prefix, 5238 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 5239 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc)); 5240 } 5241 5242 return MatchOperand_Success; 5243 } 5244 5245 return parseRegOrImm(Operands); 5246 } 5247 5248 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 5249 // Clear any forced encodings from the previous instruction. 5250 setForcedEncodingSize(0); 5251 setForcedDPP(false); 5252 setForcedSDWA(false); 5253 5254 if (Name.endswith("_e64")) { 5255 setForcedEncodingSize(64); 5256 return Name.substr(0, Name.size() - 4); 5257 } else if (Name.endswith("_e32")) { 5258 setForcedEncodingSize(32); 5259 return Name.substr(0, Name.size() - 4); 5260 } else if (Name.endswith("_dpp")) { 5261 setForcedDPP(true); 5262 return Name.substr(0, Name.size() - 4); 5263 } else if (Name.endswith("_sdwa")) { 5264 setForcedSDWA(true); 5265 return Name.substr(0, Name.size() - 5); 5266 } 5267 return Name; 5268 } 5269 5270 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 5271 StringRef Name, 5272 SMLoc NameLoc, OperandVector &Operands) { 5273 // Add the instruction mnemonic 5274 Name = parseMnemonicSuffix(Name); 5275 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 5276 5277 bool IsMIMG = Name.startswith("image_"); 5278 5279 while (!trySkipToken(AsmToken::EndOfStatement)) { 5280 OperandMode Mode = OperandMode_Default; 5281 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5282 Mode = OperandMode_NSA; 5283 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5284 5285 if (Res != MatchOperand_Success) { 5286 checkUnsupportedInstruction(Name, NameLoc); 5287 if (!Parser.hasPendingError()) { 5288 // FIXME: use real operand location rather than the current location. 5289 StringRef Msg = 5290 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5291 "not a valid operand."; 5292 Error(getLoc(), Msg); 5293 } 5294 while (!trySkipToken(AsmToken::EndOfStatement)) { 5295 lex(); 5296 } 5297 return true; 5298 } 5299 5300 // Eat the comma or space if there is one. 5301 trySkipToken(AsmToken::Comma); 5302 } 5303 5304 return false; 5305 } 5306 5307 //===----------------------------------------------------------------------===// 5308 // Utility functions 5309 //===----------------------------------------------------------------------===// 5310 5311 OperandMatchResultTy 5312 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5313 5314 if (!trySkipId(Prefix, AsmToken::Colon)) 5315 return MatchOperand_NoMatch; 5316 5317 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5318 } 5319 5320 OperandMatchResultTy 5321 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5322 AMDGPUOperand::ImmTy ImmTy, 5323 bool (*ConvertResult)(int64_t&)) { 5324 SMLoc S = getLoc(); 5325 int64_t Value = 0; 5326 5327 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5328 if (Res != MatchOperand_Success) 5329 return Res; 5330 5331 if (ConvertResult && !ConvertResult(Value)) { 5332 Error(S, "invalid " + StringRef(Prefix) + " value."); 5333 } 5334 5335 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5336 return MatchOperand_Success; 5337 } 5338 5339 OperandMatchResultTy 5340 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5341 OperandVector &Operands, 5342 AMDGPUOperand::ImmTy ImmTy, 5343 bool (*ConvertResult)(int64_t&)) { 5344 SMLoc S = getLoc(); 5345 if (!trySkipId(Prefix, AsmToken::Colon)) 5346 return MatchOperand_NoMatch; 5347 5348 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5349 return MatchOperand_ParseFail; 5350 5351 unsigned Val = 0; 5352 const unsigned MaxSize = 4; 5353 5354 // FIXME: How to verify the number of elements matches the number of src 5355 // operands? 5356 for (int I = 0; ; ++I) { 5357 int64_t Op; 5358 SMLoc Loc = getLoc(); 5359 if (!parseExpr(Op)) 5360 return MatchOperand_ParseFail; 5361 5362 if (Op != 0 && Op != 1) { 5363 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5364 return MatchOperand_ParseFail; 5365 } 5366 5367 Val |= (Op << I); 5368 5369 if (trySkipToken(AsmToken::RBrac)) 5370 break; 5371 5372 if (I + 1 == MaxSize) { 5373 Error(getLoc(), "expected a closing square bracket"); 5374 return MatchOperand_ParseFail; 5375 } 5376 5377 if (!skipToken(AsmToken::Comma, "expected a comma")) 5378 return MatchOperand_ParseFail; 5379 } 5380 5381 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5382 return MatchOperand_Success; 5383 } 5384 5385 OperandMatchResultTy 5386 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands, 5387 AMDGPUOperand::ImmTy ImmTy) { 5388 int64_t Bit; 5389 SMLoc S = getLoc(); 5390 5391 if (trySkipId(Name)) { 5392 Bit = 1; 5393 } else if (trySkipId("no", Name)) { 5394 Bit = 0; 5395 } else { 5396 return MatchOperand_NoMatch; 5397 } 5398 5399 if (Name == "r128" && !hasMIMG_R128()) { 5400 Error(S, "r128 modifier is not supported on this GPU"); 5401 return MatchOperand_ParseFail; 5402 } 5403 if (Name == "a16" && !isGFX9() && !hasGFX10A16()) { 5404 Error(S, "a16 modifier is not supported on this GPU"); 5405 return MatchOperand_ParseFail; 5406 } 5407 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) { 5408 Error(S, "dlc modifier is not supported on this GPU"); 5409 return MatchOperand_ParseFail; 5410 } 5411 if (!isGFX90A() && ImmTy == AMDGPUOperand::ImmTySCCB) 5412 return MatchOperand_ParseFail; 5413 5414 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5415 ImmTy = AMDGPUOperand::ImmTyR128A16; 5416 5417 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5418 return MatchOperand_Success; 5419 } 5420 5421 static void addOptionalImmOperand( 5422 MCInst& Inst, const OperandVector& Operands, 5423 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5424 AMDGPUOperand::ImmTy ImmT, 5425 int64_t Default = 0) { 5426 auto i = OptionalIdx.find(ImmT); 5427 if (i != OptionalIdx.end()) { 5428 unsigned Idx = i->second; 5429 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5430 } else { 5431 Inst.addOperand(MCOperand::createImm(Default)); 5432 } 5433 } 5434 5435 OperandMatchResultTy 5436 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, 5437 StringRef &Value, 5438 SMLoc &StringLoc) { 5439 if (!trySkipId(Prefix, AsmToken::Colon)) 5440 return MatchOperand_NoMatch; 5441 5442 StringLoc = getLoc(); 5443 return parseId(Value, "expected an identifier") ? MatchOperand_Success 5444 : MatchOperand_ParseFail; 5445 } 5446 5447 //===----------------------------------------------------------------------===// 5448 // MTBUF format 5449 //===----------------------------------------------------------------------===// 5450 5451 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5452 int64_t MaxVal, 5453 int64_t &Fmt) { 5454 int64_t Val; 5455 SMLoc Loc = getLoc(); 5456 5457 auto Res = parseIntWithPrefix(Pref, Val); 5458 if (Res == MatchOperand_ParseFail) 5459 return false; 5460 if (Res == MatchOperand_NoMatch) 5461 return true; 5462 5463 if (Val < 0 || Val > MaxVal) { 5464 Error(Loc, Twine("out of range ", StringRef(Pref))); 5465 return false; 5466 } 5467 5468 Fmt = Val; 5469 return true; 5470 } 5471 5472 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5473 // values to live in a joint format operand in the MCInst encoding. 5474 OperandMatchResultTy 5475 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5476 using namespace llvm::AMDGPU::MTBUFFormat; 5477 5478 int64_t Dfmt = DFMT_UNDEF; 5479 int64_t Nfmt = NFMT_UNDEF; 5480 5481 // dfmt and nfmt can appear in either order, and each is optional. 5482 for (int I = 0; I < 2; ++I) { 5483 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5484 return MatchOperand_ParseFail; 5485 5486 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5487 return MatchOperand_ParseFail; 5488 } 5489 // Skip optional comma between dfmt/nfmt 5490 // but guard against 2 commas following each other. 5491 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5492 !peekToken().is(AsmToken::Comma)) { 5493 trySkipToken(AsmToken::Comma); 5494 } 5495 } 5496 5497 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5498 return MatchOperand_NoMatch; 5499 5500 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5501 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5502 5503 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5504 return MatchOperand_Success; 5505 } 5506 5507 OperandMatchResultTy 5508 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5509 using namespace llvm::AMDGPU::MTBUFFormat; 5510 5511 int64_t Fmt = UFMT_UNDEF; 5512 5513 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5514 return MatchOperand_ParseFail; 5515 5516 if (Fmt == UFMT_UNDEF) 5517 return MatchOperand_NoMatch; 5518 5519 Format = Fmt; 5520 return MatchOperand_Success; 5521 } 5522 5523 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5524 int64_t &Nfmt, 5525 StringRef FormatStr, 5526 SMLoc Loc) { 5527 using namespace llvm::AMDGPU::MTBUFFormat; 5528 int64_t Format; 5529 5530 Format = getDfmt(FormatStr); 5531 if (Format != DFMT_UNDEF) { 5532 Dfmt = Format; 5533 return true; 5534 } 5535 5536 Format = getNfmt(FormatStr, getSTI()); 5537 if (Format != NFMT_UNDEF) { 5538 Nfmt = Format; 5539 return true; 5540 } 5541 5542 Error(Loc, "unsupported format"); 5543 return false; 5544 } 5545 5546 OperandMatchResultTy 5547 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5548 SMLoc FormatLoc, 5549 int64_t &Format) { 5550 using namespace llvm::AMDGPU::MTBUFFormat; 5551 5552 int64_t Dfmt = DFMT_UNDEF; 5553 int64_t Nfmt = NFMT_UNDEF; 5554 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5555 return MatchOperand_ParseFail; 5556 5557 if (trySkipToken(AsmToken::Comma)) { 5558 StringRef Str; 5559 SMLoc Loc = getLoc(); 5560 if (!parseId(Str, "expected a format string") || 5561 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5562 return MatchOperand_ParseFail; 5563 } 5564 if (Dfmt == DFMT_UNDEF) { 5565 Error(Loc, "duplicate numeric format"); 5566 return MatchOperand_ParseFail; 5567 } else if (Nfmt == NFMT_UNDEF) { 5568 Error(Loc, "duplicate data format"); 5569 return MatchOperand_ParseFail; 5570 } 5571 } 5572 5573 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5574 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5575 5576 if (isGFX10Plus()) { 5577 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5578 if (Ufmt == UFMT_UNDEF) { 5579 Error(FormatLoc, "unsupported format"); 5580 return MatchOperand_ParseFail; 5581 } 5582 Format = Ufmt; 5583 } else { 5584 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5585 } 5586 5587 return MatchOperand_Success; 5588 } 5589 5590 OperandMatchResultTy 5591 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5592 SMLoc Loc, 5593 int64_t &Format) { 5594 using namespace llvm::AMDGPU::MTBUFFormat; 5595 5596 auto Id = getUnifiedFormat(FormatStr); 5597 if (Id == UFMT_UNDEF) 5598 return MatchOperand_NoMatch; 5599 5600 if (!isGFX10Plus()) { 5601 Error(Loc, "unified format is not supported on this GPU"); 5602 return MatchOperand_ParseFail; 5603 } 5604 5605 Format = Id; 5606 return MatchOperand_Success; 5607 } 5608 5609 OperandMatchResultTy 5610 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5611 using namespace llvm::AMDGPU::MTBUFFormat; 5612 SMLoc Loc = getLoc(); 5613 5614 if (!parseExpr(Format)) 5615 return MatchOperand_ParseFail; 5616 if (!isValidFormatEncoding(Format, getSTI())) { 5617 Error(Loc, "out of range format"); 5618 return MatchOperand_ParseFail; 5619 } 5620 5621 return MatchOperand_Success; 5622 } 5623 5624 OperandMatchResultTy 5625 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5626 using namespace llvm::AMDGPU::MTBUFFormat; 5627 5628 if (!trySkipId("format", AsmToken::Colon)) 5629 return MatchOperand_NoMatch; 5630 5631 if (trySkipToken(AsmToken::LBrac)) { 5632 StringRef FormatStr; 5633 SMLoc Loc = getLoc(); 5634 if (!parseId(FormatStr, "expected a format string")) 5635 return MatchOperand_ParseFail; 5636 5637 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5638 if (Res == MatchOperand_NoMatch) 5639 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5640 if (Res != MatchOperand_Success) 5641 return Res; 5642 5643 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5644 return MatchOperand_ParseFail; 5645 5646 return MatchOperand_Success; 5647 } 5648 5649 return parseNumericFormat(Format); 5650 } 5651 5652 OperandMatchResultTy 5653 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5654 using namespace llvm::AMDGPU::MTBUFFormat; 5655 5656 int64_t Format = getDefaultFormatEncoding(getSTI()); 5657 OperandMatchResultTy Res; 5658 SMLoc Loc = getLoc(); 5659 5660 // Parse legacy format syntax. 5661 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5662 if (Res == MatchOperand_ParseFail) 5663 return Res; 5664 5665 bool FormatFound = (Res == MatchOperand_Success); 5666 5667 Operands.push_back( 5668 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5669 5670 if (FormatFound) 5671 trySkipToken(AsmToken::Comma); 5672 5673 if (isToken(AsmToken::EndOfStatement)) { 5674 // We are expecting an soffset operand, 5675 // but let matcher handle the error. 5676 return MatchOperand_Success; 5677 } 5678 5679 // Parse soffset. 5680 Res = parseRegOrImm(Operands); 5681 if (Res != MatchOperand_Success) 5682 return Res; 5683 5684 trySkipToken(AsmToken::Comma); 5685 5686 if (!FormatFound) { 5687 Res = parseSymbolicOrNumericFormat(Format); 5688 if (Res == MatchOperand_ParseFail) 5689 return Res; 5690 if (Res == MatchOperand_Success) { 5691 auto Size = Operands.size(); 5692 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5693 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5694 Op.setImm(Format); 5695 } 5696 return MatchOperand_Success; 5697 } 5698 5699 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5700 Error(getLoc(), "duplicate format"); 5701 return MatchOperand_ParseFail; 5702 } 5703 return MatchOperand_Success; 5704 } 5705 5706 //===----------------------------------------------------------------------===// 5707 // ds 5708 //===----------------------------------------------------------------------===// 5709 5710 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5711 const OperandVector &Operands) { 5712 OptionalImmIndexMap OptionalIdx; 5713 5714 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5715 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5716 5717 // Add the register arguments 5718 if (Op.isReg()) { 5719 Op.addRegOperands(Inst, 1); 5720 continue; 5721 } 5722 5723 // Handle optional arguments 5724 OptionalIdx[Op.getImmTy()] = i; 5725 } 5726 5727 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5728 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5729 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5730 5731 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5732 } 5733 5734 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5735 bool IsGdsHardcoded) { 5736 OptionalImmIndexMap OptionalIdx; 5737 5738 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5739 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5740 5741 // Add the register arguments 5742 if (Op.isReg()) { 5743 Op.addRegOperands(Inst, 1); 5744 continue; 5745 } 5746 5747 if (Op.isToken() && Op.getToken() == "gds") { 5748 IsGdsHardcoded = true; 5749 continue; 5750 } 5751 5752 // Handle optional arguments 5753 OptionalIdx[Op.getImmTy()] = i; 5754 } 5755 5756 AMDGPUOperand::ImmTy OffsetType = 5757 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5758 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5759 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5760 AMDGPUOperand::ImmTyOffset; 5761 5762 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5763 5764 if (!IsGdsHardcoded) { 5765 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5766 } 5767 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5768 } 5769 5770 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5771 OptionalImmIndexMap OptionalIdx; 5772 5773 unsigned OperandIdx[4]; 5774 unsigned EnMask = 0; 5775 int SrcIdx = 0; 5776 5777 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5778 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5779 5780 // Add the register arguments 5781 if (Op.isReg()) { 5782 assert(SrcIdx < 4); 5783 OperandIdx[SrcIdx] = Inst.size(); 5784 Op.addRegOperands(Inst, 1); 5785 ++SrcIdx; 5786 continue; 5787 } 5788 5789 if (Op.isOff()) { 5790 assert(SrcIdx < 4); 5791 OperandIdx[SrcIdx] = Inst.size(); 5792 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5793 ++SrcIdx; 5794 continue; 5795 } 5796 5797 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5798 Op.addImmOperands(Inst, 1); 5799 continue; 5800 } 5801 5802 if (Op.isToken() && Op.getToken() == "done") 5803 continue; 5804 5805 // Handle optional arguments 5806 OptionalIdx[Op.getImmTy()] = i; 5807 } 5808 5809 assert(SrcIdx == 4); 5810 5811 bool Compr = false; 5812 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5813 Compr = true; 5814 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5815 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5816 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5817 } 5818 5819 for (auto i = 0; i < SrcIdx; ++i) { 5820 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5821 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5822 } 5823 } 5824 5825 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5826 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5827 5828 Inst.addOperand(MCOperand::createImm(EnMask)); 5829 } 5830 5831 //===----------------------------------------------------------------------===// 5832 // s_waitcnt 5833 //===----------------------------------------------------------------------===// 5834 5835 static bool 5836 encodeCnt( 5837 const AMDGPU::IsaVersion ISA, 5838 int64_t &IntVal, 5839 int64_t CntVal, 5840 bool Saturate, 5841 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5842 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5843 { 5844 bool Failed = false; 5845 5846 IntVal = encode(ISA, IntVal, CntVal); 5847 if (CntVal != decode(ISA, IntVal)) { 5848 if (Saturate) { 5849 IntVal = encode(ISA, IntVal, -1); 5850 } else { 5851 Failed = true; 5852 } 5853 } 5854 return Failed; 5855 } 5856 5857 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5858 5859 SMLoc CntLoc = getLoc(); 5860 StringRef CntName = getTokenStr(); 5861 5862 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5863 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5864 return false; 5865 5866 int64_t CntVal; 5867 SMLoc ValLoc = getLoc(); 5868 if (!parseExpr(CntVal)) 5869 return false; 5870 5871 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5872 5873 bool Failed = true; 5874 bool Sat = CntName.endswith("_sat"); 5875 5876 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5877 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5878 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5879 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5880 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5881 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5882 } else { 5883 Error(CntLoc, "invalid counter name " + CntName); 5884 return false; 5885 } 5886 5887 if (Failed) { 5888 Error(ValLoc, "too large value for " + CntName); 5889 return false; 5890 } 5891 5892 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5893 return false; 5894 5895 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5896 if (isToken(AsmToken::EndOfStatement)) { 5897 Error(getLoc(), "expected a counter name"); 5898 return false; 5899 } 5900 } 5901 5902 return true; 5903 } 5904 5905 OperandMatchResultTy 5906 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5907 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5908 int64_t Waitcnt = getWaitcntBitMask(ISA); 5909 SMLoc S = getLoc(); 5910 5911 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5912 while (!isToken(AsmToken::EndOfStatement)) { 5913 if (!parseCnt(Waitcnt)) 5914 return MatchOperand_ParseFail; 5915 } 5916 } else { 5917 if (!parseExpr(Waitcnt)) 5918 return MatchOperand_ParseFail; 5919 } 5920 5921 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5922 return MatchOperand_Success; 5923 } 5924 5925 bool 5926 AMDGPUOperand::isSWaitCnt() const { 5927 return isImm(); 5928 } 5929 5930 //===----------------------------------------------------------------------===// 5931 // hwreg 5932 //===----------------------------------------------------------------------===// 5933 5934 bool 5935 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5936 OperandInfoTy &Offset, 5937 OperandInfoTy &Width) { 5938 using namespace llvm::AMDGPU::Hwreg; 5939 5940 // The register may be specified by name or using a numeric code 5941 HwReg.Loc = getLoc(); 5942 if (isToken(AsmToken::Identifier) && 5943 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5944 HwReg.IsSymbolic = true; 5945 lex(); // skip register name 5946 } else if (!parseExpr(HwReg.Id, "a register name")) { 5947 return false; 5948 } 5949 5950 if (trySkipToken(AsmToken::RParen)) 5951 return true; 5952 5953 // parse optional params 5954 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 5955 return false; 5956 5957 Offset.Loc = getLoc(); 5958 if (!parseExpr(Offset.Id)) 5959 return false; 5960 5961 if (!skipToken(AsmToken::Comma, "expected a comma")) 5962 return false; 5963 5964 Width.Loc = getLoc(); 5965 return parseExpr(Width.Id) && 5966 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5967 } 5968 5969 bool 5970 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5971 const OperandInfoTy &Offset, 5972 const OperandInfoTy &Width) { 5973 5974 using namespace llvm::AMDGPU::Hwreg; 5975 5976 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5977 Error(HwReg.Loc, 5978 "specified hardware register is not supported on this GPU"); 5979 return false; 5980 } 5981 if (!isValidHwreg(HwReg.Id)) { 5982 Error(HwReg.Loc, 5983 "invalid code of hardware register: only 6-bit values are legal"); 5984 return false; 5985 } 5986 if (!isValidHwregOffset(Offset.Id)) { 5987 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 5988 return false; 5989 } 5990 if (!isValidHwregWidth(Width.Id)) { 5991 Error(Width.Loc, 5992 "invalid bitfield width: only values from 1 to 32 are legal"); 5993 return false; 5994 } 5995 return true; 5996 } 5997 5998 OperandMatchResultTy 5999 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 6000 using namespace llvm::AMDGPU::Hwreg; 6001 6002 int64_t ImmVal = 0; 6003 SMLoc Loc = getLoc(); 6004 6005 if (trySkipId("hwreg", AsmToken::LParen)) { 6006 OperandInfoTy HwReg(ID_UNKNOWN_); 6007 OperandInfoTy Offset(OFFSET_DEFAULT_); 6008 OperandInfoTy Width(WIDTH_DEFAULT_); 6009 if (parseHwregBody(HwReg, Offset, Width) && 6010 validateHwreg(HwReg, Offset, Width)) { 6011 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 6012 } else { 6013 return MatchOperand_ParseFail; 6014 } 6015 } else if (parseExpr(ImmVal, "a hwreg macro")) { 6016 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6017 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6018 return MatchOperand_ParseFail; 6019 } 6020 } else { 6021 return MatchOperand_ParseFail; 6022 } 6023 6024 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 6025 return MatchOperand_Success; 6026 } 6027 6028 bool AMDGPUOperand::isHwreg() const { 6029 return isImmTy(ImmTyHwreg); 6030 } 6031 6032 //===----------------------------------------------------------------------===// 6033 // sendmsg 6034 //===----------------------------------------------------------------------===// 6035 6036 bool 6037 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 6038 OperandInfoTy &Op, 6039 OperandInfoTy &Stream) { 6040 using namespace llvm::AMDGPU::SendMsg; 6041 6042 Msg.Loc = getLoc(); 6043 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 6044 Msg.IsSymbolic = true; 6045 lex(); // skip message name 6046 } else if (!parseExpr(Msg.Id, "a message name")) { 6047 return false; 6048 } 6049 6050 if (trySkipToken(AsmToken::Comma)) { 6051 Op.IsDefined = true; 6052 Op.Loc = getLoc(); 6053 if (isToken(AsmToken::Identifier) && 6054 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 6055 lex(); // skip operation name 6056 } else if (!parseExpr(Op.Id, "an operation name")) { 6057 return false; 6058 } 6059 6060 if (trySkipToken(AsmToken::Comma)) { 6061 Stream.IsDefined = true; 6062 Stream.Loc = getLoc(); 6063 if (!parseExpr(Stream.Id)) 6064 return false; 6065 } 6066 } 6067 6068 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 6069 } 6070 6071 bool 6072 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 6073 const OperandInfoTy &Op, 6074 const OperandInfoTy &Stream) { 6075 using namespace llvm::AMDGPU::SendMsg; 6076 6077 // Validation strictness depends on whether message is specified 6078 // in a symbolc or in a numeric form. In the latter case 6079 // only encoding possibility is checked. 6080 bool Strict = Msg.IsSymbolic; 6081 6082 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 6083 Error(Msg.Loc, "invalid message id"); 6084 return false; 6085 } 6086 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 6087 if (Op.IsDefined) { 6088 Error(Op.Loc, "message does not support operations"); 6089 } else { 6090 Error(Msg.Loc, "missing message operation"); 6091 } 6092 return false; 6093 } 6094 if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) { 6095 Error(Op.Loc, "invalid operation id"); 6096 return false; 6097 } 6098 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 6099 Error(Stream.Loc, "message operation does not support streams"); 6100 return false; 6101 } 6102 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) { 6103 Error(Stream.Loc, "invalid message stream id"); 6104 return false; 6105 } 6106 return true; 6107 } 6108 6109 OperandMatchResultTy 6110 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 6111 using namespace llvm::AMDGPU::SendMsg; 6112 6113 int64_t ImmVal = 0; 6114 SMLoc Loc = getLoc(); 6115 6116 if (trySkipId("sendmsg", AsmToken::LParen)) { 6117 OperandInfoTy Msg(ID_UNKNOWN_); 6118 OperandInfoTy Op(OP_NONE_); 6119 OperandInfoTy Stream(STREAM_ID_NONE_); 6120 if (parseSendMsgBody(Msg, Op, Stream) && 6121 validateSendMsg(Msg, Op, Stream)) { 6122 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 6123 } else { 6124 return MatchOperand_ParseFail; 6125 } 6126 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 6127 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 6128 Error(Loc, "invalid immediate: only 16-bit values are legal"); 6129 return MatchOperand_ParseFail; 6130 } 6131 } else { 6132 return MatchOperand_ParseFail; 6133 } 6134 6135 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 6136 return MatchOperand_Success; 6137 } 6138 6139 bool AMDGPUOperand::isSendMsg() const { 6140 return isImmTy(ImmTySendMsg); 6141 } 6142 6143 //===----------------------------------------------------------------------===// 6144 // v_interp 6145 //===----------------------------------------------------------------------===// 6146 6147 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 6148 StringRef Str; 6149 SMLoc S = getLoc(); 6150 6151 if (!parseId(Str)) 6152 return MatchOperand_NoMatch; 6153 6154 int Slot = StringSwitch<int>(Str) 6155 .Case("p10", 0) 6156 .Case("p20", 1) 6157 .Case("p0", 2) 6158 .Default(-1); 6159 6160 if (Slot == -1) { 6161 Error(S, "invalid interpolation slot"); 6162 return MatchOperand_ParseFail; 6163 } 6164 6165 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 6166 AMDGPUOperand::ImmTyInterpSlot)); 6167 return MatchOperand_Success; 6168 } 6169 6170 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 6171 StringRef Str; 6172 SMLoc S = getLoc(); 6173 6174 if (!parseId(Str)) 6175 return MatchOperand_NoMatch; 6176 6177 if (!Str.startswith("attr")) { 6178 Error(S, "invalid interpolation attribute"); 6179 return MatchOperand_ParseFail; 6180 } 6181 6182 StringRef Chan = Str.take_back(2); 6183 int AttrChan = StringSwitch<int>(Chan) 6184 .Case(".x", 0) 6185 .Case(".y", 1) 6186 .Case(".z", 2) 6187 .Case(".w", 3) 6188 .Default(-1); 6189 if (AttrChan == -1) { 6190 Error(S, "invalid or missing interpolation attribute channel"); 6191 return MatchOperand_ParseFail; 6192 } 6193 6194 Str = Str.drop_back(2).drop_front(4); 6195 6196 uint8_t Attr; 6197 if (Str.getAsInteger(10, Attr)) { 6198 Error(S, "invalid or missing interpolation attribute number"); 6199 return MatchOperand_ParseFail; 6200 } 6201 6202 if (Attr > 63) { 6203 Error(S, "out of bounds interpolation attribute number"); 6204 return MatchOperand_ParseFail; 6205 } 6206 6207 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 6208 6209 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 6210 AMDGPUOperand::ImmTyInterpAttr)); 6211 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 6212 AMDGPUOperand::ImmTyAttrChan)); 6213 return MatchOperand_Success; 6214 } 6215 6216 //===----------------------------------------------------------------------===// 6217 // exp 6218 //===----------------------------------------------------------------------===// 6219 6220 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6221 using namespace llvm::AMDGPU::Exp; 6222 6223 StringRef Str; 6224 SMLoc S = getLoc(); 6225 6226 if (!parseId(Str)) 6227 return MatchOperand_NoMatch; 6228 6229 unsigned Id = getTgtId(Str); 6230 if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) { 6231 Error(S, (Id == ET_INVALID) ? 6232 "invalid exp target" : 6233 "exp target is not supported on this GPU"); 6234 return MatchOperand_ParseFail; 6235 } 6236 6237 Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S, 6238 AMDGPUOperand::ImmTyExpTgt)); 6239 return MatchOperand_Success; 6240 } 6241 6242 //===----------------------------------------------------------------------===// 6243 // parser helpers 6244 //===----------------------------------------------------------------------===// 6245 6246 bool 6247 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6248 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6249 } 6250 6251 bool 6252 AMDGPUAsmParser::isId(const StringRef Id) const { 6253 return isId(getToken(), Id); 6254 } 6255 6256 bool 6257 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6258 return getTokenKind() == Kind; 6259 } 6260 6261 bool 6262 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6263 if (isId(Id)) { 6264 lex(); 6265 return true; 6266 } 6267 return false; 6268 } 6269 6270 bool 6271 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) { 6272 if (isToken(AsmToken::Identifier)) { 6273 StringRef Tok = getTokenStr(); 6274 if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) { 6275 lex(); 6276 return true; 6277 } 6278 } 6279 return false; 6280 } 6281 6282 bool 6283 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6284 if (isId(Id) && peekToken().is(Kind)) { 6285 lex(); 6286 lex(); 6287 return true; 6288 } 6289 return false; 6290 } 6291 6292 bool 6293 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6294 if (isToken(Kind)) { 6295 lex(); 6296 return true; 6297 } 6298 return false; 6299 } 6300 6301 bool 6302 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6303 const StringRef ErrMsg) { 6304 if (!trySkipToken(Kind)) { 6305 Error(getLoc(), ErrMsg); 6306 return false; 6307 } 6308 return true; 6309 } 6310 6311 bool 6312 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6313 SMLoc S = getLoc(); 6314 6315 const MCExpr *Expr; 6316 if (Parser.parseExpression(Expr)) 6317 return false; 6318 6319 if (Expr->evaluateAsAbsolute(Imm)) 6320 return true; 6321 6322 if (Expected.empty()) { 6323 Error(S, "expected absolute expression"); 6324 } else { 6325 Error(S, Twine("expected ", Expected) + 6326 Twine(" or an absolute expression")); 6327 } 6328 return false; 6329 } 6330 6331 bool 6332 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6333 SMLoc S = getLoc(); 6334 6335 const MCExpr *Expr; 6336 if (Parser.parseExpression(Expr)) 6337 return false; 6338 6339 int64_t IntVal; 6340 if (Expr->evaluateAsAbsolute(IntVal)) { 6341 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6342 } else { 6343 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6344 } 6345 return true; 6346 } 6347 6348 bool 6349 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6350 if (isToken(AsmToken::String)) { 6351 Val = getToken().getStringContents(); 6352 lex(); 6353 return true; 6354 } else { 6355 Error(getLoc(), ErrMsg); 6356 return false; 6357 } 6358 } 6359 6360 bool 6361 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6362 if (isToken(AsmToken::Identifier)) { 6363 Val = getTokenStr(); 6364 lex(); 6365 return true; 6366 } else { 6367 if (!ErrMsg.empty()) 6368 Error(getLoc(), ErrMsg); 6369 return false; 6370 } 6371 } 6372 6373 AsmToken 6374 AMDGPUAsmParser::getToken() const { 6375 return Parser.getTok(); 6376 } 6377 6378 AsmToken 6379 AMDGPUAsmParser::peekToken() { 6380 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6381 } 6382 6383 void 6384 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6385 auto TokCount = getLexer().peekTokens(Tokens); 6386 6387 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6388 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6389 } 6390 6391 AsmToken::TokenKind 6392 AMDGPUAsmParser::getTokenKind() const { 6393 return getLexer().getKind(); 6394 } 6395 6396 SMLoc 6397 AMDGPUAsmParser::getLoc() const { 6398 return getToken().getLoc(); 6399 } 6400 6401 StringRef 6402 AMDGPUAsmParser::getTokenStr() const { 6403 return getToken().getString(); 6404 } 6405 6406 void 6407 AMDGPUAsmParser::lex() { 6408 Parser.Lex(); 6409 } 6410 6411 SMLoc 6412 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6413 const OperandVector &Operands) const { 6414 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6415 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6416 if (Test(Op)) 6417 return Op.getStartLoc(); 6418 } 6419 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6420 } 6421 6422 SMLoc 6423 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6424 const OperandVector &Operands) const { 6425 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6426 return getOperandLoc(Test, Operands); 6427 } 6428 6429 SMLoc 6430 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6431 const OperandVector &Operands) const { 6432 auto Test = [=](const AMDGPUOperand& Op) { 6433 return Op.isRegKind() && Op.getReg() == Reg; 6434 }; 6435 return getOperandLoc(Test, Operands); 6436 } 6437 6438 SMLoc 6439 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6440 auto Test = [](const AMDGPUOperand& Op) { 6441 return Op.IsImmKindLiteral() || Op.isExpr(); 6442 }; 6443 return getOperandLoc(Test, Operands); 6444 } 6445 6446 SMLoc 6447 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6448 auto Test = [](const AMDGPUOperand& Op) { 6449 return Op.isImmKindConst(); 6450 }; 6451 return getOperandLoc(Test, Operands); 6452 } 6453 6454 //===----------------------------------------------------------------------===// 6455 // swizzle 6456 //===----------------------------------------------------------------------===// 6457 6458 LLVM_READNONE 6459 static unsigned 6460 encodeBitmaskPerm(const unsigned AndMask, 6461 const unsigned OrMask, 6462 const unsigned XorMask) { 6463 using namespace llvm::AMDGPU::Swizzle; 6464 6465 return BITMASK_PERM_ENC | 6466 (AndMask << BITMASK_AND_SHIFT) | 6467 (OrMask << BITMASK_OR_SHIFT) | 6468 (XorMask << BITMASK_XOR_SHIFT); 6469 } 6470 6471 bool 6472 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6473 const unsigned MinVal, 6474 const unsigned MaxVal, 6475 const StringRef ErrMsg, 6476 SMLoc &Loc) { 6477 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6478 return false; 6479 } 6480 Loc = getLoc(); 6481 if (!parseExpr(Op)) { 6482 return false; 6483 } 6484 if (Op < MinVal || Op > MaxVal) { 6485 Error(Loc, ErrMsg); 6486 return false; 6487 } 6488 6489 return true; 6490 } 6491 6492 bool 6493 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6494 const unsigned MinVal, 6495 const unsigned MaxVal, 6496 const StringRef ErrMsg) { 6497 SMLoc Loc; 6498 for (unsigned i = 0; i < OpNum; ++i) { 6499 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6500 return false; 6501 } 6502 6503 return true; 6504 } 6505 6506 bool 6507 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6508 using namespace llvm::AMDGPU::Swizzle; 6509 6510 int64_t Lane[LANE_NUM]; 6511 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6512 "expected a 2-bit lane id")) { 6513 Imm = QUAD_PERM_ENC; 6514 for (unsigned I = 0; I < LANE_NUM; ++I) { 6515 Imm |= Lane[I] << (LANE_SHIFT * I); 6516 } 6517 return true; 6518 } 6519 return false; 6520 } 6521 6522 bool 6523 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6524 using namespace llvm::AMDGPU::Swizzle; 6525 6526 SMLoc Loc; 6527 int64_t GroupSize; 6528 int64_t LaneIdx; 6529 6530 if (!parseSwizzleOperand(GroupSize, 6531 2, 32, 6532 "group size must be in the interval [2,32]", 6533 Loc)) { 6534 return false; 6535 } 6536 if (!isPowerOf2_64(GroupSize)) { 6537 Error(Loc, "group size must be a power of two"); 6538 return false; 6539 } 6540 if (parseSwizzleOperand(LaneIdx, 6541 0, GroupSize - 1, 6542 "lane id must be in the interval [0,group size - 1]", 6543 Loc)) { 6544 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6545 return true; 6546 } 6547 return false; 6548 } 6549 6550 bool 6551 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6552 using namespace llvm::AMDGPU::Swizzle; 6553 6554 SMLoc Loc; 6555 int64_t GroupSize; 6556 6557 if (!parseSwizzleOperand(GroupSize, 6558 2, 32, 6559 "group size must be in the interval [2,32]", 6560 Loc)) { 6561 return false; 6562 } 6563 if (!isPowerOf2_64(GroupSize)) { 6564 Error(Loc, "group size must be a power of two"); 6565 return false; 6566 } 6567 6568 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6569 return true; 6570 } 6571 6572 bool 6573 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6574 using namespace llvm::AMDGPU::Swizzle; 6575 6576 SMLoc Loc; 6577 int64_t GroupSize; 6578 6579 if (!parseSwizzleOperand(GroupSize, 6580 1, 16, 6581 "group size must be in the interval [1,16]", 6582 Loc)) { 6583 return false; 6584 } 6585 if (!isPowerOf2_64(GroupSize)) { 6586 Error(Loc, "group size must be a power of two"); 6587 return false; 6588 } 6589 6590 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6591 return true; 6592 } 6593 6594 bool 6595 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6596 using namespace llvm::AMDGPU::Swizzle; 6597 6598 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6599 return false; 6600 } 6601 6602 StringRef Ctl; 6603 SMLoc StrLoc = getLoc(); 6604 if (!parseString(Ctl)) { 6605 return false; 6606 } 6607 if (Ctl.size() != BITMASK_WIDTH) { 6608 Error(StrLoc, "expected a 5-character mask"); 6609 return false; 6610 } 6611 6612 unsigned AndMask = 0; 6613 unsigned OrMask = 0; 6614 unsigned XorMask = 0; 6615 6616 for (size_t i = 0; i < Ctl.size(); ++i) { 6617 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6618 switch(Ctl[i]) { 6619 default: 6620 Error(StrLoc, "invalid mask"); 6621 return false; 6622 case '0': 6623 break; 6624 case '1': 6625 OrMask |= Mask; 6626 break; 6627 case 'p': 6628 AndMask |= Mask; 6629 break; 6630 case 'i': 6631 AndMask |= Mask; 6632 XorMask |= Mask; 6633 break; 6634 } 6635 } 6636 6637 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6638 return true; 6639 } 6640 6641 bool 6642 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6643 6644 SMLoc OffsetLoc = getLoc(); 6645 6646 if (!parseExpr(Imm, "a swizzle macro")) { 6647 return false; 6648 } 6649 if (!isUInt<16>(Imm)) { 6650 Error(OffsetLoc, "expected a 16-bit offset"); 6651 return false; 6652 } 6653 return true; 6654 } 6655 6656 bool 6657 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6658 using namespace llvm::AMDGPU::Swizzle; 6659 6660 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6661 6662 SMLoc ModeLoc = getLoc(); 6663 bool Ok = false; 6664 6665 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6666 Ok = parseSwizzleQuadPerm(Imm); 6667 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6668 Ok = parseSwizzleBitmaskPerm(Imm); 6669 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6670 Ok = parseSwizzleBroadcast(Imm); 6671 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6672 Ok = parseSwizzleSwap(Imm); 6673 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6674 Ok = parseSwizzleReverse(Imm); 6675 } else { 6676 Error(ModeLoc, "expected a swizzle mode"); 6677 } 6678 6679 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6680 } 6681 6682 return false; 6683 } 6684 6685 OperandMatchResultTy 6686 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6687 SMLoc S = getLoc(); 6688 int64_t Imm = 0; 6689 6690 if (trySkipId("offset")) { 6691 6692 bool Ok = false; 6693 if (skipToken(AsmToken::Colon, "expected a colon")) { 6694 if (trySkipId("swizzle")) { 6695 Ok = parseSwizzleMacro(Imm); 6696 } else { 6697 Ok = parseSwizzleOffset(Imm); 6698 } 6699 } 6700 6701 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6702 6703 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6704 } else { 6705 // Swizzle "offset" operand is optional. 6706 // If it is omitted, try parsing other optional operands. 6707 return parseOptionalOpr(Operands); 6708 } 6709 } 6710 6711 bool 6712 AMDGPUOperand::isSwizzle() const { 6713 return isImmTy(ImmTySwizzle); 6714 } 6715 6716 //===----------------------------------------------------------------------===// 6717 // VGPR Index Mode 6718 //===----------------------------------------------------------------------===// 6719 6720 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6721 6722 using namespace llvm::AMDGPU::VGPRIndexMode; 6723 6724 if (trySkipToken(AsmToken::RParen)) { 6725 return OFF; 6726 } 6727 6728 int64_t Imm = 0; 6729 6730 while (true) { 6731 unsigned Mode = 0; 6732 SMLoc S = getLoc(); 6733 6734 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6735 if (trySkipId(IdSymbolic[ModeId])) { 6736 Mode = 1 << ModeId; 6737 break; 6738 } 6739 } 6740 6741 if (Mode == 0) { 6742 Error(S, (Imm == 0)? 6743 "expected a VGPR index mode or a closing parenthesis" : 6744 "expected a VGPR index mode"); 6745 return UNDEF; 6746 } 6747 6748 if (Imm & Mode) { 6749 Error(S, "duplicate VGPR index mode"); 6750 return UNDEF; 6751 } 6752 Imm |= Mode; 6753 6754 if (trySkipToken(AsmToken::RParen)) 6755 break; 6756 if (!skipToken(AsmToken::Comma, 6757 "expected a comma or a closing parenthesis")) 6758 return UNDEF; 6759 } 6760 6761 return Imm; 6762 } 6763 6764 OperandMatchResultTy 6765 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6766 6767 using namespace llvm::AMDGPU::VGPRIndexMode; 6768 6769 int64_t Imm = 0; 6770 SMLoc S = getLoc(); 6771 6772 if (trySkipId("gpr_idx", AsmToken::LParen)) { 6773 Imm = parseGPRIdxMacro(); 6774 if (Imm == UNDEF) 6775 return MatchOperand_ParseFail; 6776 } else { 6777 if (getParser().parseAbsoluteExpression(Imm)) 6778 return MatchOperand_ParseFail; 6779 if (Imm < 0 || !isUInt<4>(Imm)) { 6780 Error(S, "invalid immediate: only 4-bit values are legal"); 6781 return MatchOperand_ParseFail; 6782 } 6783 } 6784 6785 Operands.push_back( 6786 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6787 return MatchOperand_Success; 6788 } 6789 6790 bool AMDGPUOperand::isGPRIdxMode() const { 6791 return isImmTy(ImmTyGprIdxMode); 6792 } 6793 6794 //===----------------------------------------------------------------------===// 6795 // sopp branch targets 6796 //===----------------------------------------------------------------------===// 6797 6798 OperandMatchResultTy 6799 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6800 6801 // Make sure we are not parsing something 6802 // that looks like a label or an expression but is not. 6803 // This will improve error messages. 6804 if (isRegister() || isModifier()) 6805 return MatchOperand_NoMatch; 6806 6807 if (!parseExpr(Operands)) 6808 return MatchOperand_ParseFail; 6809 6810 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6811 assert(Opr.isImm() || Opr.isExpr()); 6812 SMLoc Loc = Opr.getStartLoc(); 6813 6814 // Currently we do not support arbitrary expressions as branch targets. 6815 // Only labels and absolute expressions are accepted. 6816 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6817 Error(Loc, "expected an absolute expression or a label"); 6818 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6819 Error(Loc, "expected a 16-bit signed jump offset"); 6820 } 6821 6822 return MatchOperand_Success; 6823 } 6824 6825 //===----------------------------------------------------------------------===// 6826 // Boolean holding registers 6827 //===----------------------------------------------------------------------===// 6828 6829 OperandMatchResultTy 6830 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6831 return parseReg(Operands); 6832 } 6833 6834 //===----------------------------------------------------------------------===// 6835 // mubuf 6836 //===----------------------------------------------------------------------===// 6837 6838 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6839 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6840 } 6841 6842 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSCCB() const { 6843 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySCCB); 6844 } 6845 6846 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6847 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6848 } 6849 6850 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6851 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6852 } 6853 6854 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6855 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6856 } 6857 6858 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6859 const OperandVector &Operands, 6860 bool IsAtomic, 6861 bool IsAtomicReturn, 6862 bool IsLds) { 6863 bool IsLdsOpcode = IsLds; 6864 bool HasLdsModifier = false; 6865 OptionalImmIndexMap OptionalIdx; 6866 assert(IsAtomicReturn ? IsAtomic : true); 6867 unsigned FirstOperandIdx = 1; 6868 6869 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6870 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6871 6872 // Add the register arguments 6873 if (Op.isReg()) { 6874 Op.addRegOperands(Inst, 1); 6875 // Insert a tied src for atomic return dst. 6876 // This cannot be postponed as subsequent calls to 6877 // addImmOperands rely on correct number of MC operands. 6878 if (IsAtomicReturn && i == FirstOperandIdx) 6879 Op.addRegOperands(Inst, 1); 6880 continue; 6881 } 6882 6883 // Handle the case where soffset is an immediate 6884 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6885 Op.addImmOperands(Inst, 1); 6886 continue; 6887 } 6888 6889 HasLdsModifier |= Op.isLDS(); 6890 6891 // Handle tokens like 'offen' which are sometimes hard-coded into the 6892 // asm string. There are no MCInst operands for these. 6893 if (Op.isToken()) { 6894 continue; 6895 } 6896 assert(Op.isImm()); 6897 6898 // Handle optional arguments 6899 OptionalIdx[Op.getImmTy()] = i; 6900 } 6901 6902 // This is a workaround for an llvm quirk which may result in an 6903 // incorrect instruction selection. Lds and non-lds versions of 6904 // MUBUF instructions are identical except that lds versions 6905 // have mandatory 'lds' modifier. However this modifier follows 6906 // optional modifiers and llvm asm matcher regards this 'lds' 6907 // modifier as an optional one. As a result, an lds version 6908 // of opcode may be selected even if it has no 'lds' modifier. 6909 if (IsLdsOpcode && !HasLdsModifier) { 6910 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6911 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6912 Inst.setOpcode(NoLdsOpcode); 6913 IsLdsOpcode = false; 6914 } 6915 } 6916 6917 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6918 if (!IsAtomic || IsAtomicReturn) { 6919 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC, 6920 IsAtomicReturn ? -1 : 0); 6921 } 6922 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6923 6924 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6926 } 6927 6928 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6929 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB); 6931 } 6932 6933 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6934 OptionalImmIndexMap OptionalIdx; 6935 6936 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6937 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6938 6939 // Add the register arguments 6940 if (Op.isReg()) { 6941 Op.addRegOperands(Inst, 1); 6942 continue; 6943 } 6944 6945 // Handle the case where soffset is an immediate 6946 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6947 Op.addImmOperands(Inst, 1); 6948 continue; 6949 } 6950 6951 // Handle tokens like 'offen' which are sometimes hard-coded into the 6952 // asm string. There are no MCInst operands for these. 6953 if (Op.isToken()) { 6954 continue; 6955 } 6956 assert(Op.isImm()); 6957 6958 // Handle optional arguments 6959 OptionalIdx[Op.getImmTy()] = i; 6960 } 6961 6962 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6963 AMDGPUOperand::ImmTyOffset); 6964 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6965 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6966 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6967 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6968 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6969 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ); 6970 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySCCB); 6971 } 6972 6973 //===----------------------------------------------------------------------===// 6974 // mimg 6975 //===----------------------------------------------------------------------===// 6976 6977 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6978 bool IsAtomic) { 6979 unsigned I = 1; 6980 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6981 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6982 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6983 } 6984 6985 if (IsAtomic) { 6986 // Add src, same as dst 6987 assert(Desc.getNumDefs() == 1); 6988 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6989 } 6990 6991 OptionalImmIndexMap OptionalIdx; 6992 6993 for (unsigned E = Operands.size(); I != E; ++I) { 6994 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6995 6996 // Add the register arguments 6997 if (Op.isReg()) { 6998 Op.addRegOperands(Inst, 1); 6999 } else if (Op.isImmModifier()) { 7000 OptionalIdx[Op.getImmTy()] = I; 7001 } else if (!Op.isToken()) { 7002 llvm_unreachable("unexpected operand type"); 7003 } 7004 } 7005 7006 bool IsGFX10Plus = isGFX10Plus(); 7007 7008 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 7009 if (IsGFX10Plus) 7010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 7011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 7012 7013 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::sccb) != -1) 7014 addOptionalImmOperand(Inst, Operands, OptionalIdx, 7015 AMDGPUOperand::ImmTySCCB); 7016 7017 if (IsGFX10Plus) 7018 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 7019 7020 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 7021 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 7022 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 7023 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1) 7024 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 7025 if (IsGFX10Plus) 7026 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 7027 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 7028 if (!IsGFX10Plus) 7029 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 7030 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 7031 } 7032 7033 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 7034 cvtMIMG(Inst, Operands, true); 7035 } 7036 7037 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 7038 const OperandVector &Operands) { 7039 for (unsigned I = 1; I < Operands.size(); ++I) { 7040 auto &Operand = (AMDGPUOperand &)*Operands[I]; 7041 if (Operand.isReg()) 7042 Operand.addRegOperands(Inst, 1); 7043 } 7044 7045 Inst.addOperand(MCOperand::createImm(1)); // a16 7046 } 7047 7048 //===----------------------------------------------------------------------===// 7049 // smrd 7050 //===----------------------------------------------------------------------===// 7051 7052 bool AMDGPUOperand::isSMRDOffset8() const { 7053 return isImm() && isUInt<8>(getImm()); 7054 } 7055 7056 bool AMDGPUOperand::isSMEMOffset() const { 7057 return isImm(); // Offset range is checked later by validator. 7058 } 7059 7060 bool AMDGPUOperand::isSMRDLiteralOffset() const { 7061 // 32-bit literals are only supported on CI and we only want to use them 7062 // when the offset is > 8-bits. 7063 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 7064 } 7065 7066 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 7067 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7068 } 7069 7070 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 7071 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7072 } 7073 7074 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 7075 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7076 } 7077 7078 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 7079 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 7080 } 7081 7082 //===----------------------------------------------------------------------===// 7083 // vop3 7084 //===----------------------------------------------------------------------===// 7085 7086 static bool ConvertOmodMul(int64_t &Mul) { 7087 if (Mul != 1 && Mul != 2 && Mul != 4) 7088 return false; 7089 7090 Mul >>= 1; 7091 return true; 7092 } 7093 7094 static bool ConvertOmodDiv(int64_t &Div) { 7095 if (Div == 1) { 7096 Div = 0; 7097 return true; 7098 } 7099 7100 if (Div == 2) { 7101 Div = 3; 7102 return true; 7103 } 7104 7105 return false; 7106 } 7107 7108 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1. 7109 // This is intentional and ensures compatibility with sp3. 7110 // See bug 35397 for details. 7111 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 7112 if (BoundCtrl == 0 || BoundCtrl == 1) { 7113 BoundCtrl = 1; 7114 return true; 7115 } 7116 return false; 7117 } 7118 7119 // Note: the order in this table matches the order of operands in AsmString. 7120 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 7121 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 7122 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 7123 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 7124 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 7125 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 7126 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 7127 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 7128 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 7129 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 7130 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 7131 {"scc", AMDGPUOperand::ImmTySCCB, true, nullptr}, 7132 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 7133 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 7134 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 7135 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 7136 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7137 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 7138 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 7139 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 7140 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 7141 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 7142 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 7143 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 7144 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 7145 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 7146 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 7147 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 7148 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 7149 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 7150 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 7151 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 7152 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 7153 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 7154 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 7155 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 7156 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 7157 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 7158 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 7159 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 7160 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 7161 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 7162 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 7163 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 7164 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 7165 }; 7166 7167 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 7168 7169 OperandMatchResultTy res = parseOptionalOpr(Operands); 7170 7171 // This is a hack to enable hardcoded mandatory operands which follow 7172 // optional operands. 7173 // 7174 // Current design assumes that all operands after the first optional operand 7175 // are also optional. However implementation of some instructions violates 7176 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 7177 // 7178 // To alleviate this problem, we have to (implicitly) parse extra operands 7179 // to make sure autogenerated parser of custom operands never hit hardcoded 7180 // mandatory operands. 7181 7182 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 7183 if (res != MatchOperand_Success || 7184 isToken(AsmToken::EndOfStatement)) 7185 break; 7186 7187 trySkipToken(AsmToken::Comma); 7188 res = parseOptionalOpr(Operands); 7189 } 7190 7191 return res; 7192 } 7193 7194 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 7195 OperandMatchResultTy res; 7196 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 7197 // try to parse any optional operand here 7198 if (Op.IsBit) { 7199 res = parseNamedBit(Op.Name, Operands, Op.Type); 7200 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 7201 res = parseOModOperand(Operands); 7202 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 7203 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 7204 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 7205 res = parseSDWASel(Operands, Op.Name, Op.Type); 7206 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 7207 res = parseSDWADstUnused(Operands); 7208 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 7209 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 7210 Op.Type == AMDGPUOperand::ImmTyNegLo || 7211 Op.Type == AMDGPUOperand::ImmTyNegHi) { 7212 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 7213 Op.ConvertResult); 7214 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 7215 res = parseDim(Operands); 7216 } else { 7217 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 7218 } 7219 if (res != MatchOperand_NoMatch) { 7220 return res; 7221 } 7222 } 7223 return MatchOperand_NoMatch; 7224 } 7225 7226 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 7227 StringRef Name = getTokenStr(); 7228 if (Name == "mul") { 7229 return parseIntWithPrefix("mul", Operands, 7230 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 7231 } 7232 7233 if (Name == "div") { 7234 return parseIntWithPrefix("div", Operands, 7235 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7236 } 7237 7238 return MatchOperand_NoMatch; 7239 } 7240 7241 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7242 cvtVOP3P(Inst, Operands); 7243 7244 int Opc = Inst.getOpcode(); 7245 7246 int SrcNum; 7247 const int Ops[] = { AMDGPU::OpName::src0, 7248 AMDGPU::OpName::src1, 7249 AMDGPU::OpName::src2 }; 7250 for (SrcNum = 0; 7251 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7252 ++SrcNum); 7253 assert(SrcNum > 0); 7254 7255 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7256 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7257 7258 if ((OpSel & (1 << SrcNum)) != 0) { 7259 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7260 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7261 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7262 } 7263 } 7264 7265 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7266 // 1. This operand is input modifiers 7267 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7268 // 2. This is not last operand 7269 && Desc.NumOperands > (OpNum + 1) 7270 // 3. Next operand is register class 7271 && Desc.OpInfo[OpNum + 1].RegClass != -1 7272 // 4. Next register is not tied to any other operand 7273 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7274 } 7275 7276 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7277 { 7278 OptionalImmIndexMap OptionalIdx; 7279 unsigned Opc = Inst.getOpcode(); 7280 7281 unsigned I = 1; 7282 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7283 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7284 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7285 } 7286 7287 for (unsigned E = Operands.size(); I != E; ++I) { 7288 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7289 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7290 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7291 } else if (Op.isInterpSlot() || 7292 Op.isInterpAttr() || 7293 Op.isAttrChan()) { 7294 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7295 } else if (Op.isImmModifier()) { 7296 OptionalIdx[Op.getImmTy()] = I; 7297 } else { 7298 llvm_unreachable("unhandled operand type"); 7299 } 7300 } 7301 7302 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7303 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7304 } 7305 7306 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7307 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7308 } 7309 7310 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7311 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7312 } 7313 } 7314 7315 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7316 OptionalImmIndexMap &OptionalIdx) { 7317 unsigned Opc = Inst.getOpcode(); 7318 7319 unsigned I = 1; 7320 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7321 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7322 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7323 } 7324 7325 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7326 // This instruction has src modifiers 7327 for (unsigned E = Operands.size(); I != E; ++I) { 7328 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7329 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7330 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7331 } else if (Op.isImmModifier()) { 7332 OptionalIdx[Op.getImmTy()] = I; 7333 } else if (Op.isRegOrImm()) { 7334 Op.addRegOrImmOperands(Inst, 1); 7335 } else { 7336 llvm_unreachable("unhandled operand type"); 7337 } 7338 } 7339 } else { 7340 // No src modifiers 7341 for (unsigned E = Operands.size(); I != E; ++I) { 7342 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7343 if (Op.isMod()) { 7344 OptionalIdx[Op.getImmTy()] = I; 7345 } else { 7346 Op.addRegOrImmOperands(Inst, 1); 7347 } 7348 } 7349 } 7350 7351 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7352 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7353 } 7354 7355 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7356 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7357 } 7358 7359 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7360 // it has src2 register operand that is tied to dst operand 7361 // we don't allow modifiers for this operand in assembler so src2_modifiers 7362 // should be 0. 7363 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7364 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7365 Opc == AMDGPU::V_MAC_F32_e64_vi || 7366 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7367 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7368 Opc == AMDGPU::V_MAC_F16_e64_vi || 7369 Opc == AMDGPU::V_FMAC_F64_e64_gfx90a || 7370 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7371 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7372 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7373 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7374 auto it = Inst.begin(); 7375 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7376 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7377 ++it; 7378 // Copy the operand to ensure it's not invalidated when Inst grows. 7379 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7380 } 7381 } 7382 7383 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7384 OptionalImmIndexMap OptionalIdx; 7385 cvtVOP3(Inst, Operands, OptionalIdx); 7386 } 7387 7388 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7389 const OperandVector &Operands) { 7390 OptionalImmIndexMap OptIdx; 7391 const int Opc = Inst.getOpcode(); 7392 const MCInstrDesc &Desc = MII.get(Opc); 7393 7394 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7395 7396 cvtVOP3(Inst, Operands, OptIdx); 7397 7398 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7399 assert(!IsPacked); 7400 Inst.addOperand(Inst.getOperand(0)); 7401 } 7402 7403 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7404 // instruction, and then figure out where to actually put the modifiers 7405 7406 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7407 7408 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7409 if (OpSelHiIdx != -1) { 7410 int DefaultVal = IsPacked ? -1 : 0; 7411 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7412 DefaultVal); 7413 } 7414 7415 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7416 if (NegLoIdx != -1) { 7417 assert(IsPacked); 7418 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7419 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7420 } 7421 7422 const int Ops[] = { AMDGPU::OpName::src0, 7423 AMDGPU::OpName::src1, 7424 AMDGPU::OpName::src2 }; 7425 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7426 AMDGPU::OpName::src1_modifiers, 7427 AMDGPU::OpName::src2_modifiers }; 7428 7429 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7430 7431 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7432 unsigned OpSelHi = 0; 7433 unsigned NegLo = 0; 7434 unsigned NegHi = 0; 7435 7436 if (OpSelHiIdx != -1) { 7437 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7438 } 7439 7440 if (NegLoIdx != -1) { 7441 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7442 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7443 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7444 } 7445 7446 for (int J = 0; J < 3; ++J) { 7447 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7448 if (OpIdx == -1) 7449 break; 7450 7451 uint32_t ModVal = 0; 7452 7453 if ((OpSel & (1 << J)) != 0) 7454 ModVal |= SISrcMods::OP_SEL_0; 7455 7456 if ((OpSelHi & (1 << J)) != 0) 7457 ModVal |= SISrcMods::OP_SEL_1; 7458 7459 if ((NegLo & (1 << J)) != 0) 7460 ModVal |= SISrcMods::NEG; 7461 7462 if ((NegHi & (1 << J)) != 0) 7463 ModVal |= SISrcMods::NEG_HI; 7464 7465 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7466 7467 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7468 } 7469 } 7470 7471 //===----------------------------------------------------------------------===// 7472 // dpp 7473 //===----------------------------------------------------------------------===// 7474 7475 bool AMDGPUOperand::isDPP8() const { 7476 return isImmTy(ImmTyDPP8); 7477 } 7478 7479 bool AMDGPUOperand::isDPPCtrl() const { 7480 using namespace AMDGPU::DPP; 7481 7482 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7483 if (result) { 7484 int64_t Imm = getImm(); 7485 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7486 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7487 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7488 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7489 (Imm == DppCtrl::WAVE_SHL1) || 7490 (Imm == DppCtrl::WAVE_ROL1) || 7491 (Imm == DppCtrl::WAVE_SHR1) || 7492 (Imm == DppCtrl::WAVE_ROR1) || 7493 (Imm == DppCtrl::ROW_MIRROR) || 7494 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7495 (Imm == DppCtrl::BCAST15) || 7496 (Imm == DppCtrl::BCAST31) || 7497 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7498 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7499 } 7500 return false; 7501 } 7502 7503 //===----------------------------------------------------------------------===// 7504 // mAI 7505 //===----------------------------------------------------------------------===// 7506 7507 bool AMDGPUOperand::isBLGP() const { 7508 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7509 } 7510 7511 bool AMDGPUOperand::isCBSZ() const { 7512 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7513 } 7514 7515 bool AMDGPUOperand::isABID() const { 7516 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7517 } 7518 7519 bool AMDGPUOperand::isS16Imm() const { 7520 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7521 } 7522 7523 bool AMDGPUOperand::isU16Imm() const { 7524 return isImm() && isUInt<16>(getImm()); 7525 } 7526 7527 //===----------------------------------------------------------------------===// 7528 // dim 7529 //===----------------------------------------------------------------------===// 7530 7531 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) { 7532 // We want to allow "dim:1D" etc., 7533 // but the initial 1 is tokenized as an integer. 7534 std::string Token; 7535 if (isToken(AsmToken::Integer)) { 7536 SMLoc Loc = getToken().getEndLoc(); 7537 Token = std::string(getTokenStr()); 7538 lex(); 7539 if (getLoc() != Loc) 7540 return false; 7541 } 7542 7543 StringRef Suffix; 7544 if (!parseId(Suffix)) 7545 return false; 7546 Token += Suffix; 7547 7548 StringRef DimId = Token; 7549 if (DimId.startswith("SQ_RSRC_IMG_")) 7550 DimId = DimId.drop_front(12); 7551 7552 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7553 if (!DimInfo) 7554 return false; 7555 7556 Encoding = DimInfo->Encoding; 7557 return true; 7558 } 7559 7560 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7561 if (!isGFX10Plus()) 7562 return MatchOperand_NoMatch; 7563 7564 SMLoc S = getLoc(); 7565 7566 if (!trySkipId("dim", AsmToken::Colon)) 7567 return MatchOperand_NoMatch; 7568 7569 unsigned Encoding; 7570 SMLoc Loc = getLoc(); 7571 if (!parseDimId(Encoding)) { 7572 Error(Loc, "invalid dim value"); 7573 return MatchOperand_ParseFail; 7574 } 7575 7576 Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S, 7577 AMDGPUOperand::ImmTyDim)); 7578 return MatchOperand_Success; 7579 } 7580 7581 //===----------------------------------------------------------------------===// 7582 // dpp 7583 //===----------------------------------------------------------------------===// 7584 7585 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7586 SMLoc S = getLoc(); 7587 7588 if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon)) 7589 return MatchOperand_NoMatch; 7590 7591 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7592 7593 int64_t Sels[8]; 7594 7595 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7596 return MatchOperand_ParseFail; 7597 7598 for (size_t i = 0; i < 8; ++i) { 7599 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7600 return MatchOperand_ParseFail; 7601 7602 SMLoc Loc = getLoc(); 7603 if (getParser().parseAbsoluteExpression(Sels[i])) 7604 return MatchOperand_ParseFail; 7605 if (0 > Sels[i] || 7 < Sels[i]) { 7606 Error(Loc, "expected a 3-bit value"); 7607 return MatchOperand_ParseFail; 7608 } 7609 } 7610 7611 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7612 return MatchOperand_ParseFail; 7613 7614 unsigned DPP8 = 0; 7615 for (size_t i = 0; i < 8; ++i) 7616 DPP8 |= (Sels[i] << (i * 3)); 7617 7618 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7619 return MatchOperand_Success; 7620 } 7621 7622 bool 7623 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl, 7624 const OperandVector &Operands) { 7625 if (Ctrl == "row_newbcast") 7626 return isGFX90A(); 7627 7628 // DPP64 is supported for row_newbcast only. 7629 const MCRegisterInfo *MRI = getMRI(); 7630 if (Operands.size() > 2 && Operands[1]->isReg() && 7631 MRI->getSubReg(Operands[1]->getReg(), AMDGPU::sub1)) 7632 return false; 7633 7634 if (Ctrl == "row_share" || 7635 Ctrl == "row_xmask") 7636 return isGFX10Plus(); 7637 7638 if (Ctrl == "wave_shl" || 7639 Ctrl == "wave_shr" || 7640 Ctrl == "wave_rol" || 7641 Ctrl == "wave_ror" || 7642 Ctrl == "row_bcast") 7643 return isVI() || isGFX9(); 7644 7645 return Ctrl == "row_mirror" || 7646 Ctrl == "row_half_mirror" || 7647 Ctrl == "quad_perm" || 7648 Ctrl == "row_shl" || 7649 Ctrl == "row_shr" || 7650 Ctrl == "row_ror"; 7651 } 7652 7653 int64_t 7654 AMDGPUAsmParser::parseDPPCtrlPerm() { 7655 // quad_perm:[%d,%d,%d,%d] 7656 7657 if (!skipToken(AsmToken::LBrac, "expected an opening square bracket")) 7658 return -1; 7659 7660 int64_t Val = 0; 7661 for (int i = 0; i < 4; ++i) { 7662 if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma")) 7663 return -1; 7664 7665 int64_t Temp; 7666 SMLoc Loc = getLoc(); 7667 if (getParser().parseAbsoluteExpression(Temp)) 7668 return -1; 7669 if (Temp < 0 || Temp > 3) { 7670 Error(Loc, "expected a 2-bit value"); 7671 return -1; 7672 } 7673 7674 Val += (Temp << i * 2); 7675 } 7676 7677 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 7678 return -1; 7679 7680 return Val; 7681 } 7682 7683 int64_t 7684 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) { 7685 using namespace AMDGPU::DPP; 7686 7687 // sel:%d 7688 7689 int64_t Val; 7690 SMLoc Loc = getLoc(); 7691 7692 if (getParser().parseAbsoluteExpression(Val)) 7693 return -1; 7694 7695 struct DppCtrlCheck { 7696 int64_t Ctrl; 7697 int Lo; 7698 int Hi; 7699 }; 7700 7701 DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl) 7702 .Case("wave_shl", {DppCtrl::WAVE_SHL1, 1, 1}) 7703 .Case("wave_rol", {DppCtrl::WAVE_ROL1, 1, 1}) 7704 .Case("wave_shr", {DppCtrl::WAVE_SHR1, 1, 1}) 7705 .Case("wave_ror", {DppCtrl::WAVE_ROR1, 1, 1}) 7706 .Case("row_shl", {DppCtrl::ROW_SHL0, 1, 15}) 7707 .Case("row_shr", {DppCtrl::ROW_SHR0, 1, 15}) 7708 .Case("row_ror", {DppCtrl::ROW_ROR0, 1, 15}) 7709 .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15}) 7710 .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15}) 7711 .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15}) 7712 .Default({-1, 0, 0}); 7713 7714 bool Valid; 7715 if (Check.Ctrl == -1) { 7716 Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31)); 7717 Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31; 7718 } else { 7719 Valid = Check.Lo <= Val && Val <= Check.Hi; 7720 Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val); 7721 } 7722 7723 if (!Valid) { 7724 Error(Loc, Twine("invalid ", Ctrl) + Twine(" value")); 7725 return -1; 7726 } 7727 7728 return Val; 7729 } 7730 7731 OperandMatchResultTy 7732 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7733 using namespace AMDGPU::DPP; 7734 7735 if (!isToken(AsmToken::Identifier) || 7736 !isSupportedDPPCtrl(getTokenStr(), Operands)) 7737 return MatchOperand_NoMatch; 7738 7739 SMLoc S = getLoc(); 7740 int64_t Val = -1; 7741 StringRef Ctrl; 7742 7743 parseId(Ctrl); 7744 7745 if (Ctrl == "row_mirror") { 7746 Val = DppCtrl::ROW_MIRROR; 7747 } else if (Ctrl == "row_half_mirror") { 7748 Val = DppCtrl::ROW_HALF_MIRROR; 7749 } else { 7750 if (skipToken(AsmToken::Colon, "expected a colon")) { 7751 if (Ctrl == "quad_perm") { 7752 Val = parseDPPCtrlPerm(); 7753 } else { 7754 Val = parseDPPCtrlSel(Ctrl); 7755 } 7756 } 7757 } 7758 7759 if (Val == -1) 7760 return MatchOperand_ParseFail; 7761 7762 Operands.push_back( 7763 AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl)); 7764 return MatchOperand_Success; 7765 } 7766 7767 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7768 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7769 } 7770 7771 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7772 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7773 } 7774 7775 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7776 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7777 } 7778 7779 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7780 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7781 } 7782 7783 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7784 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7785 } 7786 7787 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7788 OptionalImmIndexMap OptionalIdx; 7789 7790 unsigned I = 1; 7791 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7792 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7793 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7794 } 7795 7796 int Fi = 0; 7797 for (unsigned E = Operands.size(); I != E; ++I) { 7798 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7799 MCOI::TIED_TO); 7800 if (TiedTo != -1) { 7801 assert((unsigned)TiedTo < Inst.getNumOperands()); 7802 // handle tied old or src2 for MAC instructions 7803 Inst.addOperand(Inst.getOperand(TiedTo)); 7804 } 7805 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7806 // Add the register arguments 7807 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7808 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7809 // Skip it. 7810 continue; 7811 } 7812 7813 if (IsDPP8) { 7814 if (Op.isDPP8()) { 7815 Op.addImmOperands(Inst, 1); 7816 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7817 Op.addRegWithFPInputModsOperands(Inst, 2); 7818 } else if (Op.isFI()) { 7819 Fi = Op.getImm(); 7820 } else if (Op.isReg()) { 7821 Op.addRegOperands(Inst, 1); 7822 } else { 7823 llvm_unreachable("Invalid operand type"); 7824 } 7825 } else { 7826 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7827 Op.addRegWithFPInputModsOperands(Inst, 2); 7828 } else if (Op.isDPPCtrl()) { 7829 Op.addImmOperands(Inst, 1); 7830 } else if (Op.isImm()) { 7831 // Handle optional arguments 7832 OptionalIdx[Op.getImmTy()] = I; 7833 } else { 7834 llvm_unreachable("Invalid operand type"); 7835 } 7836 } 7837 } 7838 7839 if (IsDPP8) { 7840 using namespace llvm::AMDGPU::DPP; 7841 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7842 } else { 7843 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7844 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7845 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7846 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7847 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7848 } 7849 } 7850 } 7851 7852 //===----------------------------------------------------------------------===// 7853 // sdwa 7854 //===----------------------------------------------------------------------===// 7855 7856 OperandMatchResultTy 7857 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7858 AMDGPUOperand::ImmTy Type) { 7859 using namespace llvm::AMDGPU::SDWA; 7860 7861 SMLoc S = getLoc(); 7862 StringRef Value; 7863 OperandMatchResultTy res; 7864 7865 SMLoc StringLoc; 7866 res = parseStringWithPrefix(Prefix, Value, StringLoc); 7867 if (res != MatchOperand_Success) { 7868 return res; 7869 } 7870 7871 int64_t Int; 7872 Int = StringSwitch<int64_t>(Value) 7873 .Case("BYTE_0", SdwaSel::BYTE_0) 7874 .Case("BYTE_1", SdwaSel::BYTE_1) 7875 .Case("BYTE_2", SdwaSel::BYTE_2) 7876 .Case("BYTE_3", SdwaSel::BYTE_3) 7877 .Case("WORD_0", SdwaSel::WORD_0) 7878 .Case("WORD_1", SdwaSel::WORD_1) 7879 .Case("DWORD", SdwaSel::DWORD) 7880 .Default(0xffffffff); 7881 7882 if (Int == 0xffffffff) { 7883 Error(StringLoc, "invalid " + Twine(Prefix) + " value"); 7884 return MatchOperand_ParseFail; 7885 } 7886 7887 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7888 return MatchOperand_Success; 7889 } 7890 7891 OperandMatchResultTy 7892 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7893 using namespace llvm::AMDGPU::SDWA; 7894 7895 SMLoc S = getLoc(); 7896 StringRef Value; 7897 OperandMatchResultTy res; 7898 7899 SMLoc StringLoc; 7900 res = parseStringWithPrefix("dst_unused", Value, StringLoc); 7901 if (res != MatchOperand_Success) { 7902 return res; 7903 } 7904 7905 int64_t Int; 7906 Int = StringSwitch<int64_t>(Value) 7907 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7908 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7909 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7910 .Default(0xffffffff); 7911 7912 if (Int == 0xffffffff) { 7913 Error(StringLoc, "invalid dst_unused value"); 7914 return MatchOperand_ParseFail; 7915 } 7916 7917 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7918 return MatchOperand_Success; 7919 } 7920 7921 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7922 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7923 } 7924 7925 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7926 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7927 } 7928 7929 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7930 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7931 } 7932 7933 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7934 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7935 } 7936 7937 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7938 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7939 } 7940 7941 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7942 uint64_t BasicInstType, 7943 bool SkipDstVcc, 7944 bool SkipSrcVcc) { 7945 using namespace llvm::AMDGPU::SDWA; 7946 7947 OptionalImmIndexMap OptionalIdx; 7948 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7949 bool SkippedVcc = false; 7950 7951 unsigned I = 1; 7952 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7953 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7954 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7955 } 7956 7957 for (unsigned E = Operands.size(); I != E; ++I) { 7958 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7959 if (SkipVcc && !SkippedVcc && Op.isReg() && 7960 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7961 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7962 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7963 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7964 // Skip VCC only if we didn't skip it on previous iteration. 7965 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7966 if (BasicInstType == SIInstrFlags::VOP2 && 7967 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7968 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7969 SkippedVcc = true; 7970 continue; 7971 } else if (BasicInstType == SIInstrFlags::VOPC && 7972 Inst.getNumOperands() == 0) { 7973 SkippedVcc = true; 7974 continue; 7975 } 7976 } 7977 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7978 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7979 } else if (Op.isImm()) { 7980 // Handle optional arguments 7981 OptionalIdx[Op.getImmTy()] = I; 7982 } else { 7983 llvm_unreachable("Invalid operand type"); 7984 } 7985 SkippedVcc = false; 7986 } 7987 7988 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7989 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7990 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7991 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7992 switch (BasicInstType) { 7993 case SIInstrFlags::VOP1: 7994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7995 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7997 } 7998 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7999 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8000 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8001 break; 8002 8003 case SIInstrFlags::VOP2: 8004 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8005 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 8006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 8007 } 8008 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 8009 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 8010 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8011 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8012 break; 8013 8014 case SIInstrFlags::VOPC: 8015 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 8016 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 8017 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 8018 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 8019 break; 8020 8021 default: 8022 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 8023 } 8024 } 8025 8026 // special case v_mac_{f16, f32}: 8027 // it has src2 register operand that is tied to dst operand 8028 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 8029 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 8030 auto it = Inst.begin(); 8031 std::advance( 8032 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 8033 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 8034 } 8035 } 8036 8037 //===----------------------------------------------------------------------===// 8038 // mAI 8039 //===----------------------------------------------------------------------===// 8040 8041 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 8042 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 8043 } 8044 8045 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 8046 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 8047 } 8048 8049 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 8050 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 8051 } 8052 8053 /// Force static initialization. 8054 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 8055 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 8056 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 8057 } 8058 8059 #define GET_REGISTER_MATCHER 8060 #define GET_MATCHER_IMPLEMENTATION 8061 #define GET_MNEMONIC_SPELL_CHECKER 8062 #define GET_MNEMONIC_CHECKER 8063 #include "AMDGPUGenAsmMatcher.inc" 8064 8065 // This fuction should be defined after auto-generated include so that we have 8066 // MatchClassKind enum defined 8067 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 8068 unsigned Kind) { 8069 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 8070 // But MatchInstructionImpl() expects to meet token and fails to validate 8071 // operand. This method checks if we are given immediate operand but expect to 8072 // get corresponding token. 8073 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 8074 switch (Kind) { 8075 case MCK_addr64: 8076 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 8077 case MCK_gds: 8078 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 8079 case MCK_lds: 8080 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 8081 case MCK_glc: 8082 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 8083 case MCK_idxen: 8084 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 8085 case MCK_offen: 8086 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 8087 case MCK_SSrcB32: 8088 // When operands have expression values, they will return true for isToken, 8089 // because it is not possible to distinguish between a token and an 8090 // expression at parse time. MatchInstructionImpl() will always try to 8091 // match an operand as a token, when isToken returns true, and when the 8092 // name of the expression is not a valid token, the match will fail, 8093 // so we need to handle it here. 8094 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 8095 case MCK_SSrcF32: 8096 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 8097 case MCK_SoppBrTarget: 8098 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 8099 case MCK_VReg32OrOff: 8100 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 8101 case MCK_InterpSlot: 8102 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 8103 case MCK_Attr: 8104 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 8105 case MCK_AttrChan: 8106 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 8107 case MCK_ImmSMEMOffset: 8108 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 8109 case MCK_SReg_64: 8110 case MCK_SReg_64_XEXEC: 8111 // Null is defined as a 32-bit register but 8112 // it should also be enabled with 64-bit operands. 8113 // The following code enables it for SReg_64 operands 8114 // used as source and destination. Remaining source 8115 // operands are handled in isInlinableImm. 8116 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 8117 default: 8118 return Match_InvalidOperand; 8119 } 8120 } 8121 8122 //===----------------------------------------------------------------------===// 8123 // endpgm 8124 //===----------------------------------------------------------------------===// 8125 8126 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 8127 SMLoc S = getLoc(); 8128 int64_t Imm = 0; 8129 8130 if (!parseExpr(Imm)) { 8131 // The operand is optional, if not present default to 0 8132 Imm = 0; 8133 } 8134 8135 if (!isUInt<16>(Imm)) { 8136 Error(S, "expected a 16-bit value"); 8137 return MatchOperand_ParseFail; 8138 } 8139 8140 Operands.push_back( 8141 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 8142 return MatchOperand_Success; 8143 } 8144 8145 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 8146