1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_256RegClassID) || 282 isRegClass(AMDGPU::VReg_512RegClassID) || 283 isRegClass(AMDGPU::VReg_1024RegClassID); 284 } 285 286 bool isVReg32() const { 287 return isRegClass(AMDGPU::VGPR_32RegClassID); 288 } 289 290 bool isVReg32OrOff() const { 291 return isOff() || isVReg32(); 292 } 293 294 bool isNull() const { 295 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 296 } 297 298 bool isSDWAOperand(MVT type) const; 299 bool isSDWAFP16Operand() const; 300 bool isSDWAFP32Operand() const; 301 bool isSDWAInt16Operand() const; 302 bool isSDWAInt32Operand() const; 303 304 bool isImmTy(ImmTy ImmT) const { 305 return isImm() && Imm.Type == ImmT; 306 } 307 308 bool isImmModifier() const { 309 return isImm() && Imm.Type != ImmTyNone; 310 } 311 312 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 313 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 314 bool isDMask() const { return isImmTy(ImmTyDMask); } 315 bool isDim() const { return isImmTy(ImmTyDim); } 316 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 317 bool isDA() const { return isImmTy(ImmTyDA); } 318 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 319 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 320 bool isLWE() const { return isImmTy(ImmTyLWE); } 321 bool isOff() const { return isImmTy(ImmTyOff); } 322 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 323 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 324 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 325 bool isOffen() const { return isImmTy(ImmTyOffen); } 326 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 327 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 328 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 329 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 330 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 331 332 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 333 bool isGDS() const { return isImmTy(ImmTyGDS); } 334 bool isLDS() const { return isImmTy(ImmTyLDS); } 335 bool isDLC() const { return isImmTy(ImmTyDLC); } 336 bool isGLC() const { return isImmTy(ImmTyGLC); } 337 bool isSLC() const { return isImmTy(ImmTySLC); } 338 bool isSWZ() const { return isImmTy(ImmTySWZ); } 339 bool isTFE() const { return isImmTy(ImmTyTFE); } 340 bool isD16() const { return isImmTy(ImmTyD16); } 341 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 342 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 343 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 344 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 345 bool isFI() const { return isImmTy(ImmTyDppFi); } 346 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 347 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 348 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 349 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 350 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 351 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 352 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 353 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 354 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 355 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 356 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 357 bool isHigh() const { return isImmTy(ImmTyHigh); } 358 359 bool isMod() const { 360 return isClampSI() || isOModSI(); 361 } 362 363 bool isRegOrImm() const { 364 return isReg() || isImm(); 365 } 366 367 bool isRegClass(unsigned RCID) const; 368 369 bool isInlineValue() const; 370 371 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 372 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 373 } 374 375 bool isSCSrcB16() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 377 } 378 379 bool isSCSrcV2B16() const { 380 return isSCSrcB16(); 381 } 382 383 bool isSCSrcB32() const { 384 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 385 } 386 387 bool isSCSrcB64() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 389 } 390 391 bool isBoolReg() const; 392 393 bool isSCSrcF16() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 395 } 396 397 bool isSCSrcV2F16() const { 398 return isSCSrcF16(); 399 } 400 401 bool isSCSrcF32() const { 402 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 403 } 404 405 bool isSCSrcF64() const { 406 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 407 } 408 409 bool isSSrcB32() const { 410 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 411 } 412 413 bool isSSrcB16() const { 414 return isSCSrcB16() || isLiteralImm(MVT::i16); 415 } 416 417 bool isSSrcV2B16() const { 418 llvm_unreachable("cannot happen"); 419 return isSSrcB16(); 420 } 421 422 bool isSSrcB64() const { 423 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 424 // See isVSrc64(). 425 return isSCSrcB64() || isLiteralImm(MVT::i64); 426 } 427 428 bool isSSrcF32() const { 429 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 430 } 431 432 bool isSSrcF64() const { 433 return isSCSrcB64() || isLiteralImm(MVT::f64); 434 } 435 436 bool isSSrcF16() const { 437 return isSCSrcB16() || isLiteralImm(MVT::f16); 438 } 439 440 bool isSSrcV2F16() const { 441 llvm_unreachable("cannot happen"); 442 return isSSrcF16(); 443 } 444 445 bool isSSrcOrLdsB32() const { 446 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 447 isLiteralImm(MVT::i32) || isExpr(); 448 } 449 450 bool isVCSrcB32() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 452 } 453 454 bool isVCSrcB64() const { 455 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 456 } 457 458 bool isVCSrcB16() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 460 } 461 462 bool isVCSrcV2B16() const { 463 return isVCSrcB16(); 464 } 465 466 bool isVCSrcF32() const { 467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 468 } 469 470 bool isVCSrcF64() const { 471 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 472 } 473 474 bool isVCSrcF16() const { 475 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 476 } 477 478 bool isVCSrcV2F16() const { 479 return isVCSrcF16(); 480 } 481 482 bool isVSrcB32() const { 483 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 484 } 485 486 bool isVSrcB64() const { 487 return isVCSrcF64() || isLiteralImm(MVT::i64); 488 } 489 490 bool isVSrcB16() const { 491 return isVCSrcF16() || isLiteralImm(MVT::i16); 492 } 493 494 bool isVSrcV2B16() const { 495 return isVSrcB16() || isLiteralImm(MVT::v2i16); 496 } 497 498 bool isVSrcF32() const { 499 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 500 } 501 502 bool isVSrcF64() const { 503 return isVCSrcF64() || isLiteralImm(MVT::f64); 504 } 505 506 bool isVSrcF16() const { 507 return isVCSrcF16() || isLiteralImm(MVT::f16); 508 } 509 510 bool isVSrcV2F16() const { 511 return isVSrcF16() || isLiteralImm(MVT::v2f16); 512 } 513 514 bool isVISrcB32() const { 515 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 516 } 517 518 bool isVISrcB16() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 520 } 521 522 bool isVISrcV2B16() const { 523 return isVISrcB16(); 524 } 525 526 bool isVISrcF32() const { 527 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 528 } 529 530 bool isVISrcF16() const { 531 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 532 } 533 534 bool isVISrcV2F16() const { 535 return isVISrcF16() || isVISrcB32(); 536 } 537 538 bool isAISrcB32() const { 539 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 540 } 541 542 bool isAISrcB16() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 544 } 545 546 bool isAISrcV2B16() const { 547 return isAISrcB16(); 548 } 549 550 bool isAISrcF32() const { 551 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 552 } 553 554 bool isAISrcF16() const { 555 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 556 } 557 558 bool isAISrcV2F16() const { 559 return isAISrcF16() || isAISrcB32(); 560 } 561 562 bool isAISrc_128B32() const { 563 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 564 } 565 566 bool isAISrc_128B16() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 568 } 569 570 bool isAISrc_128V2B16() const { 571 return isAISrc_128B16(); 572 } 573 574 bool isAISrc_128F32() const { 575 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 576 } 577 578 bool isAISrc_128F16() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 580 } 581 582 bool isAISrc_128V2F16() const { 583 return isAISrc_128F16() || isAISrc_128B32(); 584 } 585 586 bool isAISrc_512B32() const { 587 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 588 } 589 590 bool isAISrc_512B16() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 592 } 593 594 bool isAISrc_512V2B16() const { 595 return isAISrc_512B16(); 596 } 597 598 bool isAISrc_512F32() const { 599 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 600 } 601 602 bool isAISrc_512F16() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 604 } 605 606 bool isAISrc_512V2F16() const { 607 return isAISrc_512F16() || isAISrc_512B32(); 608 } 609 610 bool isAISrc_1024B32() const { 611 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 612 } 613 614 bool isAISrc_1024B16() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 616 } 617 618 bool isAISrc_1024V2B16() const { 619 return isAISrc_1024B16(); 620 } 621 622 bool isAISrc_1024F32() const { 623 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 624 } 625 626 bool isAISrc_1024F16() const { 627 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 628 } 629 630 bool isAISrc_1024V2F16() const { 631 return isAISrc_1024F16() || isAISrc_1024B32(); 632 } 633 634 bool isKImmFP32() const { 635 return isLiteralImm(MVT::f32); 636 } 637 638 bool isKImmFP16() const { 639 return isLiteralImm(MVT::f16); 640 } 641 642 bool isMem() const override { 643 return false; 644 } 645 646 bool isExpr() const { 647 return Kind == Expression; 648 } 649 650 bool isSoppBrTarget() const { 651 return isExpr() || isImm(); 652 } 653 654 bool isSWaitCnt() const; 655 bool isHwreg() const; 656 bool isSendMsg() const; 657 bool isSwizzle() const; 658 bool isSMRDOffset8() const; 659 bool isSMRDOffset20() const; 660 bool isSMRDLiteralOffset() const; 661 bool isDPP8() const; 662 bool isDPPCtrl() const; 663 bool isBLGP() const; 664 bool isCBSZ() const; 665 bool isABID() const; 666 bool isGPRIdxMode() const; 667 bool isS16Imm() const; 668 bool isU16Imm() const; 669 bool isEndpgm() const; 670 671 StringRef getExpressionAsToken() const { 672 assert(isExpr()); 673 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 674 return S->getSymbol().getName(); 675 } 676 677 StringRef getToken() const { 678 assert(isToken()); 679 680 if (Kind == Expression) 681 return getExpressionAsToken(); 682 683 return StringRef(Tok.Data, Tok.Length); 684 } 685 686 int64_t getImm() const { 687 assert(isImm()); 688 return Imm.Val; 689 } 690 691 ImmTy getImmTy() const { 692 assert(isImm()); 693 return Imm.Type; 694 } 695 696 unsigned getReg() const override { 697 assert(isRegKind()); 698 return Reg.RegNo; 699 } 700 701 SMLoc getStartLoc() const override { 702 return StartLoc; 703 } 704 705 SMLoc getEndLoc() const override { 706 return EndLoc; 707 } 708 709 SMRange getLocRange() const { 710 return SMRange(StartLoc, EndLoc); 711 } 712 713 Modifiers getModifiers() const { 714 assert(isRegKind() || isImmTy(ImmTyNone)); 715 return isRegKind() ? Reg.Mods : Imm.Mods; 716 } 717 718 void setModifiers(Modifiers Mods) { 719 assert(isRegKind() || isImmTy(ImmTyNone)); 720 if (isRegKind()) 721 Reg.Mods = Mods; 722 else 723 Imm.Mods = Mods; 724 } 725 726 bool hasModifiers() const { 727 return getModifiers().hasModifiers(); 728 } 729 730 bool hasFPModifiers() const { 731 return getModifiers().hasFPModifiers(); 732 } 733 734 bool hasIntModifiers() const { 735 return getModifiers().hasIntModifiers(); 736 } 737 738 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 739 740 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 741 742 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 743 744 template <unsigned Bitwidth> 745 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 746 747 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 748 addKImmFPOperands<16>(Inst, N); 749 } 750 751 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 752 addKImmFPOperands<32>(Inst, N); 753 } 754 755 void addRegOperands(MCInst &Inst, unsigned N) const; 756 757 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 758 addRegOperands(Inst, N); 759 } 760 761 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 762 if (isRegKind()) 763 addRegOperands(Inst, N); 764 else if (isExpr()) 765 Inst.addOperand(MCOperand::createExpr(Expr)); 766 else 767 addImmOperands(Inst, N); 768 } 769 770 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 771 Modifiers Mods = getModifiers(); 772 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 773 if (isRegKind()) { 774 addRegOperands(Inst, N); 775 } else { 776 addImmOperands(Inst, N, false); 777 } 778 } 779 780 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 781 assert(!hasIntModifiers()); 782 addRegOrImmWithInputModsOperands(Inst, N); 783 } 784 785 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 786 assert(!hasFPModifiers()); 787 addRegOrImmWithInputModsOperands(Inst, N); 788 } 789 790 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 791 Modifiers Mods = getModifiers(); 792 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 793 assert(isRegKind()); 794 addRegOperands(Inst, N); 795 } 796 797 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 798 assert(!hasIntModifiers()); 799 addRegWithInputModsOperands(Inst, N); 800 } 801 802 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 803 assert(!hasFPModifiers()); 804 addRegWithInputModsOperands(Inst, N); 805 } 806 807 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 808 if (isImm()) 809 addImmOperands(Inst, N); 810 else { 811 assert(isExpr()); 812 Inst.addOperand(MCOperand::createExpr(Expr)); 813 } 814 } 815 816 static void printImmTy(raw_ostream& OS, ImmTy Type) { 817 switch (Type) { 818 case ImmTyNone: OS << "None"; break; 819 case ImmTyGDS: OS << "GDS"; break; 820 case ImmTyLDS: OS << "LDS"; break; 821 case ImmTyOffen: OS << "Offen"; break; 822 case ImmTyIdxen: OS << "Idxen"; break; 823 case ImmTyAddr64: OS << "Addr64"; break; 824 case ImmTyOffset: OS << "Offset"; break; 825 case ImmTyInstOffset: OS << "InstOffset"; break; 826 case ImmTyOffset0: OS << "Offset0"; break; 827 case ImmTyOffset1: OS << "Offset1"; break; 828 case ImmTyDLC: OS << "DLC"; break; 829 case ImmTyGLC: OS << "GLC"; break; 830 case ImmTySLC: OS << "SLC"; break; 831 case ImmTySWZ: OS << "SWZ"; break; 832 case ImmTyTFE: OS << "TFE"; break; 833 case ImmTyD16: OS << "D16"; break; 834 case ImmTyFORMAT: OS << "FORMAT"; break; 835 case ImmTyClampSI: OS << "ClampSI"; break; 836 case ImmTyOModSI: OS << "OModSI"; break; 837 case ImmTyDPP8: OS << "DPP8"; break; 838 case ImmTyDppCtrl: OS << "DppCtrl"; break; 839 case ImmTyDppRowMask: OS << "DppRowMask"; break; 840 case ImmTyDppBankMask: OS << "DppBankMask"; break; 841 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 842 case ImmTyDppFi: OS << "FI"; break; 843 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 844 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 845 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 846 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 847 case ImmTyDMask: OS << "DMask"; break; 848 case ImmTyDim: OS << "Dim"; break; 849 case ImmTyUNorm: OS << "UNorm"; break; 850 case ImmTyDA: OS << "DA"; break; 851 case ImmTyR128A16: OS << "R128A16"; break; 852 case ImmTyA16: OS << "A16"; break; 853 case ImmTyLWE: OS << "LWE"; break; 854 case ImmTyOff: OS << "Off"; break; 855 case ImmTyExpTgt: OS << "ExpTgt"; break; 856 case ImmTyExpCompr: OS << "ExpCompr"; break; 857 case ImmTyExpVM: OS << "ExpVM"; break; 858 case ImmTyHwreg: OS << "Hwreg"; break; 859 case ImmTySendMsg: OS << "SendMsg"; break; 860 case ImmTyInterpSlot: OS << "InterpSlot"; break; 861 case ImmTyInterpAttr: OS << "InterpAttr"; break; 862 case ImmTyAttrChan: OS << "AttrChan"; break; 863 case ImmTyOpSel: OS << "OpSel"; break; 864 case ImmTyOpSelHi: OS << "OpSelHi"; break; 865 case ImmTyNegLo: OS << "NegLo"; break; 866 case ImmTyNegHi: OS << "NegHi"; break; 867 case ImmTySwizzle: OS << "Swizzle"; break; 868 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 869 case ImmTyHigh: OS << "High"; break; 870 case ImmTyBLGP: OS << "BLGP"; break; 871 case ImmTyCBSZ: OS << "CBSZ"; break; 872 case ImmTyABID: OS << "ABID"; break; 873 case ImmTyEndpgm: OS << "Endpgm"; break; 874 } 875 } 876 877 void print(raw_ostream &OS) const override { 878 switch (Kind) { 879 case Register: 880 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 881 break; 882 case Immediate: 883 OS << '<' << getImm(); 884 if (getImmTy() != ImmTyNone) { 885 OS << " type: "; printImmTy(OS, getImmTy()); 886 } 887 OS << " mods: " << Imm.Mods << '>'; 888 break; 889 case Token: 890 OS << '\'' << getToken() << '\''; 891 break; 892 case Expression: 893 OS << "<expr " << *Expr << '>'; 894 break; 895 } 896 } 897 898 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 899 int64_t Val, SMLoc Loc, 900 ImmTy Type = ImmTyNone, 901 bool IsFPImm = false) { 902 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 903 Op->Imm.Val = Val; 904 Op->Imm.IsFPImm = IsFPImm; 905 Op->Imm.Type = Type; 906 Op->Imm.Mods = Modifiers(); 907 Op->StartLoc = Loc; 908 Op->EndLoc = Loc; 909 return Op; 910 } 911 912 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 913 StringRef Str, SMLoc Loc, 914 bool HasExplicitEncodingSize = true) { 915 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 916 Res->Tok.Data = Str.data(); 917 Res->Tok.Length = Str.size(); 918 Res->StartLoc = Loc; 919 Res->EndLoc = Loc; 920 return Res; 921 } 922 923 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 924 unsigned RegNo, SMLoc S, 925 SMLoc E) { 926 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 927 Op->Reg.RegNo = RegNo; 928 Op->Reg.Mods = Modifiers(); 929 Op->StartLoc = S; 930 Op->EndLoc = E; 931 return Op; 932 } 933 934 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 935 const class MCExpr *Expr, SMLoc S) { 936 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 937 Op->Expr = Expr; 938 Op->StartLoc = S; 939 Op->EndLoc = S; 940 return Op; 941 } 942 }; 943 944 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 945 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 946 return OS; 947 } 948 949 //===----------------------------------------------------------------------===// 950 // AsmParser 951 //===----------------------------------------------------------------------===// 952 953 // Holds info related to the current kernel, e.g. count of SGPRs used. 954 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 955 // .amdgpu_hsa_kernel or at EOF. 956 class KernelScopeInfo { 957 int SgprIndexUnusedMin = -1; 958 int VgprIndexUnusedMin = -1; 959 MCContext *Ctx = nullptr; 960 961 void usesSgprAt(int i) { 962 if (i >= SgprIndexUnusedMin) { 963 SgprIndexUnusedMin = ++i; 964 if (Ctx) { 965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 966 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 967 } 968 } 969 } 970 971 void usesVgprAt(int i) { 972 if (i >= VgprIndexUnusedMin) { 973 VgprIndexUnusedMin = ++i; 974 if (Ctx) { 975 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 976 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 977 } 978 } 979 } 980 981 public: 982 KernelScopeInfo() = default; 983 984 void initialize(MCContext &Context) { 985 Ctx = &Context; 986 usesSgprAt(SgprIndexUnusedMin = -1); 987 usesVgprAt(VgprIndexUnusedMin = -1); 988 } 989 990 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 991 switch (RegKind) { 992 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 993 case IS_AGPR: // fall through 994 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 995 default: break; 996 } 997 } 998 }; 999 1000 class AMDGPUAsmParser : public MCTargetAsmParser { 1001 MCAsmParser &Parser; 1002 1003 // Number of extra operands parsed after the first optional operand. 1004 // This may be necessary to skip hardcoded mandatory operands. 1005 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1006 1007 unsigned ForcedEncodingSize = 0; 1008 bool ForcedDPP = false; 1009 bool ForcedSDWA = false; 1010 KernelScopeInfo KernelScope; 1011 1012 /// @name Auto-generated Match Functions 1013 /// { 1014 1015 #define GET_ASSEMBLER_HEADER 1016 #include "AMDGPUGenAsmMatcher.inc" 1017 1018 /// } 1019 1020 private: 1021 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1022 bool OutOfRangeError(SMRange Range); 1023 /// Calculate VGPR/SGPR blocks required for given target, reserved 1024 /// registers, and user-specified NextFreeXGPR values. 1025 /// 1026 /// \param Features [in] Target features, used for bug corrections. 1027 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1028 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1029 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1030 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1031 /// descriptor field, if valid. 1032 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1033 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1034 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1035 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1036 /// \param VGPRBlocks [out] Result VGPR block count. 1037 /// \param SGPRBlocks [out] Result SGPR block count. 1038 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1039 bool FlatScrUsed, bool XNACKUsed, 1040 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1041 SMRange VGPRRange, unsigned NextFreeSGPR, 1042 SMRange SGPRRange, unsigned &VGPRBlocks, 1043 unsigned &SGPRBlocks); 1044 bool ParseDirectiveAMDGCNTarget(); 1045 bool ParseDirectiveAMDHSAKernel(); 1046 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1047 bool ParseDirectiveHSACodeObjectVersion(); 1048 bool ParseDirectiveHSACodeObjectISA(); 1049 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1050 bool ParseDirectiveAMDKernelCodeT(); 1051 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1052 bool ParseDirectiveAMDGPUHsaKernel(); 1053 1054 bool ParseDirectiveISAVersion(); 1055 bool ParseDirectiveHSAMetadata(); 1056 bool ParseDirectivePALMetadataBegin(); 1057 bool ParseDirectivePALMetadata(); 1058 bool ParseDirectiveAMDGPULDS(); 1059 1060 /// Common code to parse out a block of text (typically YAML) between start and 1061 /// end directives. 1062 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1063 const char *AssemblerDirectiveEnd, 1064 std::string &CollectString); 1065 1066 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1067 RegisterKind RegKind, unsigned Reg1); 1068 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1069 unsigned &RegNum, unsigned &RegWidth, 1070 bool RestoreOnFailure = false); 1071 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1072 unsigned &RegNum, unsigned &RegWidth, 1073 SmallVectorImpl<AsmToken> &Tokens); 1074 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1075 unsigned &RegWidth, 1076 SmallVectorImpl<AsmToken> &Tokens); 1077 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1078 unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1082 bool ParseRegRange(unsigned& Num, unsigned& Width); 1083 unsigned getRegularReg(RegisterKind RegKind, 1084 unsigned RegNum, 1085 unsigned RegWidth); 1086 1087 bool isRegister(); 1088 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1089 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1090 void initializeGprCountSymbol(RegisterKind RegKind); 1091 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1092 unsigned RegWidth); 1093 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1094 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1095 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1096 bool IsGdsHardcoded); 1097 1098 public: 1099 enum AMDGPUMatchResultTy { 1100 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1101 }; 1102 enum OperandMode { 1103 OperandMode_Default, 1104 OperandMode_NSA, 1105 }; 1106 1107 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1108 1109 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1110 const MCInstrInfo &MII, 1111 const MCTargetOptions &Options) 1112 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1113 MCAsmParserExtension::Initialize(Parser); 1114 1115 if (getFeatureBits().none()) { 1116 // Set default features. 1117 copySTI().ToggleFeature("southern-islands"); 1118 } 1119 1120 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1121 1122 { 1123 // TODO: make those pre-defined variables read-only. 1124 // Currently there is none suitable machinery in the core llvm-mc for this. 1125 // MCSymbol::isRedefinable is intended for another purpose, and 1126 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1127 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1128 MCContext &Ctx = getContext(); 1129 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1130 MCSymbol *Sym = 1131 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1132 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1133 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1134 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1135 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1136 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1137 } else { 1138 MCSymbol *Sym = 1139 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1140 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1141 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1142 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1143 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1144 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1145 } 1146 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1147 initializeGprCountSymbol(IS_VGPR); 1148 initializeGprCountSymbol(IS_SGPR); 1149 } else 1150 KernelScope.initialize(getContext()); 1151 } 1152 } 1153 1154 bool hasXNACK() const { 1155 return AMDGPU::hasXNACK(getSTI()); 1156 } 1157 1158 bool hasMIMG_R128() const { 1159 return AMDGPU::hasMIMG_R128(getSTI()); 1160 } 1161 1162 bool hasPackedD16() const { 1163 return AMDGPU::hasPackedD16(getSTI()); 1164 } 1165 1166 bool hasGFX10A16() const { 1167 return AMDGPU::hasGFX10A16(getSTI()); 1168 } 1169 1170 bool isSI() const { 1171 return AMDGPU::isSI(getSTI()); 1172 } 1173 1174 bool isCI() const { 1175 return AMDGPU::isCI(getSTI()); 1176 } 1177 1178 bool isVI() const { 1179 return AMDGPU::isVI(getSTI()); 1180 } 1181 1182 bool isGFX9() const { 1183 return AMDGPU::isGFX9(getSTI()); 1184 } 1185 1186 bool isGFX10() const { 1187 return AMDGPU::isGFX10(getSTI()); 1188 } 1189 1190 bool hasInv2PiInlineImm() const { 1191 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1192 } 1193 1194 bool hasFlatOffsets() const { 1195 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1196 } 1197 1198 bool hasSGPR102_SGPR103() const { 1199 return !isVI() && !isGFX9(); 1200 } 1201 1202 bool hasSGPR104_SGPR105() const { 1203 return isGFX10(); 1204 } 1205 1206 bool hasIntClamp() const { 1207 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1208 } 1209 1210 AMDGPUTargetStreamer &getTargetStreamer() { 1211 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1212 return static_cast<AMDGPUTargetStreamer &>(TS); 1213 } 1214 1215 const MCRegisterInfo *getMRI() const { 1216 // We need this const_cast because for some reason getContext() is not const 1217 // in MCAsmParser. 1218 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1219 } 1220 1221 const MCInstrInfo *getMII() const { 1222 return &MII; 1223 } 1224 1225 const FeatureBitset &getFeatureBits() const { 1226 return getSTI().getFeatureBits(); 1227 } 1228 1229 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1230 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1231 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1232 1233 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1234 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1235 bool isForcedDPP() const { return ForcedDPP; } 1236 bool isForcedSDWA() const { return ForcedSDWA; } 1237 ArrayRef<unsigned> getMatchedVariants() const; 1238 1239 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1240 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1241 bool RestoreOnFailure); 1242 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1243 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1244 SMLoc &EndLoc) override; 1245 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1246 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1247 unsigned Kind) override; 1248 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1249 OperandVector &Operands, MCStreamer &Out, 1250 uint64_t &ErrorInfo, 1251 bool MatchingInlineAsm) override; 1252 bool ParseDirective(AsmToken DirectiveID) override; 1253 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1254 OperandMode Mode = OperandMode_Default); 1255 StringRef parseMnemonicSuffix(StringRef Name); 1256 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1257 SMLoc NameLoc, OperandVector &Operands) override; 1258 //bool ProcessInstruction(MCInst &Inst); 1259 1260 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1261 1262 OperandMatchResultTy 1263 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1264 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1265 bool (*ConvertResult)(int64_t &) = nullptr); 1266 1267 OperandMatchResultTy 1268 parseOperandArrayWithPrefix(const char *Prefix, 1269 OperandVector &Operands, 1270 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1271 bool (*ConvertResult)(int64_t&) = nullptr); 1272 1273 OperandMatchResultTy 1274 parseNamedBit(const char *Name, OperandVector &Operands, 1275 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1276 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1277 StringRef &Value); 1278 1279 bool isModifier(); 1280 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1281 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1282 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1283 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1284 bool parseSP3NegModifier(); 1285 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1286 OperandMatchResultTy parseReg(OperandVector &Operands); 1287 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1288 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1289 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1290 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1291 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1292 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1293 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1294 1295 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1296 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1297 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1298 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1299 1300 bool parseCnt(int64_t &IntVal); 1301 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1302 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1303 1304 private: 1305 struct OperandInfoTy { 1306 int64_t Id; 1307 bool IsSymbolic = false; 1308 bool IsDefined = false; 1309 1310 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1311 }; 1312 1313 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1314 bool validateSendMsg(const OperandInfoTy &Msg, 1315 const OperandInfoTy &Op, 1316 const OperandInfoTy &Stream, 1317 const SMLoc Loc); 1318 1319 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1320 bool validateHwreg(const OperandInfoTy &HwReg, 1321 const int64_t Offset, 1322 const int64_t Width, 1323 const SMLoc Loc); 1324 1325 void errorExpTgt(); 1326 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1327 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1328 1329 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1330 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1331 bool validateSOPLiteral(const MCInst &Inst) const; 1332 bool validateConstantBusLimitations(const MCInst &Inst); 1333 bool validateEarlyClobberLimitations(const MCInst &Inst); 1334 bool validateIntClampSupported(const MCInst &Inst); 1335 bool validateMIMGAtomicDMask(const MCInst &Inst); 1336 bool validateMIMGGatherDMask(const MCInst &Inst); 1337 bool validateMovrels(const MCInst &Inst); 1338 bool validateMIMGDataSize(const MCInst &Inst); 1339 bool validateMIMGAddrSize(const MCInst &Inst); 1340 bool validateMIMGD16(const MCInst &Inst); 1341 bool validateMIMGDim(const MCInst &Inst); 1342 bool validateLdsDirect(const MCInst &Inst); 1343 bool validateOpSel(const MCInst &Inst); 1344 bool validateVccOperand(unsigned Reg) const; 1345 bool validateVOP3Literal(const MCInst &Inst) const; 1346 unsigned getConstantBusLimit(unsigned Opcode) const; 1347 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1348 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1349 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1350 1351 bool isId(const StringRef Id) const; 1352 bool isId(const AsmToken &Token, const StringRef Id) const; 1353 bool isToken(const AsmToken::TokenKind Kind) const; 1354 bool trySkipId(const StringRef Id); 1355 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1356 bool trySkipToken(const AsmToken::TokenKind Kind); 1357 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1358 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1359 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1360 AsmToken::TokenKind getTokenKind() const; 1361 bool parseExpr(int64_t &Imm); 1362 bool parseExpr(OperandVector &Operands); 1363 StringRef getTokenStr() const; 1364 AsmToken peekToken(); 1365 AsmToken getToken() const; 1366 SMLoc getLoc() const; 1367 void lex(); 1368 1369 public: 1370 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1371 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1372 1373 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1374 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1375 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1376 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1377 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1378 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1379 1380 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1381 const unsigned MinVal, 1382 const unsigned MaxVal, 1383 const StringRef ErrMsg); 1384 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1385 bool parseSwizzleOffset(int64_t &Imm); 1386 bool parseSwizzleMacro(int64_t &Imm); 1387 bool parseSwizzleQuadPerm(int64_t &Imm); 1388 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1389 bool parseSwizzleBroadcast(int64_t &Imm); 1390 bool parseSwizzleSwap(int64_t &Imm); 1391 bool parseSwizzleReverse(int64_t &Imm); 1392 1393 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1394 int64_t parseGPRIdxMacro(); 1395 1396 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1397 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1398 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1399 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1400 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1401 1402 AMDGPUOperand::Ptr defaultDLC() const; 1403 AMDGPUOperand::Ptr defaultGLC() const; 1404 AMDGPUOperand::Ptr defaultSLC() const; 1405 1406 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1407 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1408 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1409 AMDGPUOperand::Ptr defaultFlatOffset() const; 1410 1411 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1412 1413 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1414 OptionalImmIndexMap &OptionalIdx); 1415 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1416 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1417 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1418 1419 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1420 1421 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1422 bool IsAtomic = false); 1423 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1424 1425 OperandMatchResultTy parseDim(OperandVector &Operands); 1426 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1427 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1428 AMDGPUOperand::Ptr defaultRowMask() const; 1429 AMDGPUOperand::Ptr defaultBankMask() const; 1430 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1431 AMDGPUOperand::Ptr defaultFI() const; 1432 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1433 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1434 1435 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1436 AMDGPUOperand::ImmTy Type); 1437 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1438 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1439 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1440 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1441 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1442 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1443 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1444 uint64_t BasicInstType, 1445 bool SkipDstVcc = false, 1446 bool SkipSrcVcc = false); 1447 1448 AMDGPUOperand::Ptr defaultBLGP() const; 1449 AMDGPUOperand::Ptr defaultCBSZ() const; 1450 AMDGPUOperand::Ptr defaultABID() const; 1451 1452 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1453 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1454 }; 1455 1456 struct OptionalOperand { 1457 const char *Name; 1458 AMDGPUOperand::ImmTy Type; 1459 bool IsBit; 1460 bool (*ConvertResult)(int64_t&); 1461 }; 1462 1463 } // end anonymous namespace 1464 1465 // May be called with integer type with equivalent bitwidth. 1466 static const fltSemantics *getFltSemantics(unsigned Size) { 1467 switch (Size) { 1468 case 4: 1469 return &APFloat::IEEEsingle(); 1470 case 8: 1471 return &APFloat::IEEEdouble(); 1472 case 2: 1473 return &APFloat::IEEEhalf(); 1474 default: 1475 llvm_unreachable("unsupported fp type"); 1476 } 1477 } 1478 1479 static const fltSemantics *getFltSemantics(MVT VT) { 1480 return getFltSemantics(VT.getSizeInBits() / 8); 1481 } 1482 1483 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1484 switch (OperandType) { 1485 case AMDGPU::OPERAND_REG_IMM_INT32: 1486 case AMDGPU::OPERAND_REG_IMM_FP32: 1487 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1488 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1489 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1490 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1491 return &APFloat::IEEEsingle(); 1492 case AMDGPU::OPERAND_REG_IMM_INT64: 1493 case AMDGPU::OPERAND_REG_IMM_FP64: 1494 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1495 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1496 return &APFloat::IEEEdouble(); 1497 case AMDGPU::OPERAND_REG_IMM_INT16: 1498 case AMDGPU::OPERAND_REG_IMM_FP16: 1499 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1500 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1501 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1502 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1503 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1504 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1505 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1506 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1507 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1508 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1509 return &APFloat::IEEEhalf(); 1510 default: 1511 llvm_unreachable("unsupported fp type"); 1512 } 1513 } 1514 1515 //===----------------------------------------------------------------------===// 1516 // Operand 1517 //===----------------------------------------------------------------------===// 1518 1519 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1520 bool Lost; 1521 1522 // Convert literal to single precision 1523 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1524 APFloat::rmNearestTiesToEven, 1525 &Lost); 1526 // We allow precision lost but not overflow or underflow 1527 if (Status != APFloat::opOK && 1528 Lost && 1529 ((Status & APFloat::opOverflow) != 0 || 1530 (Status & APFloat::opUnderflow) != 0)) { 1531 return false; 1532 } 1533 1534 return true; 1535 } 1536 1537 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1538 return isUIntN(Size, Val) || isIntN(Size, Val); 1539 } 1540 1541 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1542 1543 // This is a hack to enable named inline values like 1544 // shared_base with both 32-bit and 64-bit operands. 1545 // Note that these values are defined as 1546 // 32-bit operands only. 1547 if (isInlineValue()) { 1548 return true; 1549 } 1550 1551 if (!isImmTy(ImmTyNone)) { 1552 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1553 return false; 1554 } 1555 // TODO: We should avoid using host float here. It would be better to 1556 // check the float bit values which is what a few other places do. 1557 // We've had bot failures before due to weird NaN support on mips hosts. 1558 1559 APInt Literal(64, Imm.Val); 1560 1561 if (Imm.IsFPImm) { // We got fp literal token 1562 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1563 return AMDGPU::isInlinableLiteral64(Imm.Val, 1564 AsmParser->hasInv2PiInlineImm()); 1565 } 1566 1567 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1568 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1569 return false; 1570 1571 if (type.getScalarSizeInBits() == 16) { 1572 return AMDGPU::isInlinableLiteral16( 1573 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1574 AsmParser->hasInv2PiInlineImm()); 1575 } 1576 1577 // Check if single precision literal is inlinable 1578 return AMDGPU::isInlinableLiteral32( 1579 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1580 AsmParser->hasInv2PiInlineImm()); 1581 } 1582 1583 // We got int literal token. 1584 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1585 return AMDGPU::isInlinableLiteral64(Imm.Val, 1586 AsmParser->hasInv2PiInlineImm()); 1587 } 1588 1589 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1590 return false; 1591 } 1592 1593 if (type.getScalarSizeInBits() == 16) { 1594 return AMDGPU::isInlinableLiteral16( 1595 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1596 AsmParser->hasInv2PiInlineImm()); 1597 } 1598 1599 return AMDGPU::isInlinableLiteral32( 1600 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1601 AsmParser->hasInv2PiInlineImm()); 1602 } 1603 1604 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1605 // Check that this immediate can be added as literal 1606 if (!isImmTy(ImmTyNone)) { 1607 return false; 1608 } 1609 1610 if (!Imm.IsFPImm) { 1611 // We got int literal token. 1612 1613 if (type == MVT::f64 && hasFPModifiers()) { 1614 // Cannot apply fp modifiers to int literals preserving the same semantics 1615 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1616 // disable these cases. 1617 return false; 1618 } 1619 1620 unsigned Size = type.getSizeInBits(); 1621 if (Size == 64) 1622 Size = 32; 1623 1624 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1625 // types. 1626 return isSafeTruncation(Imm.Val, Size); 1627 } 1628 1629 // We got fp literal token 1630 if (type == MVT::f64) { // Expected 64-bit fp operand 1631 // We would set low 64-bits of literal to zeroes but we accept this literals 1632 return true; 1633 } 1634 1635 if (type == MVT::i64) { // Expected 64-bit int operand 1636 // We don't allow fp literals in 64-bit integer instructions. It is 1637 // unclear how we should encode them. 1638 return false; 1639 } 1640 1641 // We allow fp literals with f16x2 operands assuming that the specified 1642 // literal goes into the lower half and the upper half is zero. We also 1643 // require that the literal may be losslesly converted to f16. 1644 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1645 (type == MVT::v2i16)? MVT::i16 : type; 1646 1647 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1648 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1649 } 1650 1651 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1652 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1653 } 1654 1655 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1656 if (AsmParser->isVI()) 1657 return isVReg32(); 1658 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1659 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1660 else 1661 return false; 1662 } 1663 1664 bool AMDGPUOperand::isSDWAFP16Operand() const { 1665 return isSDWAOperand(MVT::f16); 1666 } 1667 1668 bool AMDGPUOperand::isSDWAFP32Operand() const { 1669 return isSDWAOperand(MVT::f32); 1670 } 1671 1672 bool AMDGPUOperand::isSDWAInt16Operand() const { 1673 return isSDWAOperand(MVT::i16); 1674 } 1675 1676 bool AMDGPUOperand::isSDWAInt32Operand() const { 1677 return isSDWAOperand(MVT::i32); 1678 } 1679 1680 bool AMDGPUOperand::isBoolReg() const { 1681 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1682 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1683 } 1684 1685 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1686 { 1687 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1688 assert(Size == 2 || Size == 4 || Size == 8); 1689 1690 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1691 1692 if (Imm.Mods.Abs) { 1693 Val &= ~FpSignMask; 1694 } 1695 if (Imm.Mods.Neg) { 1696 Val ^= FpSignMask; 1697 } 1698 1699 return Val; 1700 } 1701 1702 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1703 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1704 Inst.getNumOperands())) { 1705 addLiteralImmOperand(Inst, Imm.Val, 1706 ApplyModifiers & 1707 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1708 } else { 1709 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1710 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1711 } 1712 } 1713 1714 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1715 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1716 auto OpNum = Inst.getNumOperands(); 1717 // Check that this operand accepts literals 1718 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1719 1720 if (ApplyModifiers) { 1721 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1722 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1723 Val = applyInputFPModifiers(Val, Size); 1724 } 1725 1726 APInt Literal(64, Val); 1727 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1728 1729 if (Imm.IsFPImm) { // We got fp literal token 1730 switch (OpTy) { 1731 case AMDGPU::OPERAND_REG_IMM_INT64: 1732 case AMDGPU::OPERAND_REG_IMM_FP64: 1733 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1734 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1735 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1736 AsmParser->hasInv2PiInlineImm())) { 1737 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1738 return; 1739 } 1740 1741 // Non-inlineable 1742 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1743 // For fp operands we check if low 32 bits are zeros 1744 if (Literal.getLoBits(32) != 0) { 1745 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1746 "Can't encode literal as exact 64-bit floating-point operand. " 1747 "Low 32-bits will be set to zero"); 1748 } 1749 1750 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1751 return; 1752 } 1753 1754 // We don't allow fp literals in 64-bit integer instructions. It is 1755 // unclear how we should encode them. This case should be checked earlier 1756 // in predicate methods (isLiteralImm()) 1757 llvm_unreachable("fp literal in 64-bit integer instruction."); 1758 1759 case AMDGPU::OPERAND_REG_IMM_INT32: 1760 case AMDGPU::OPERAND_REG_IMM_FP32: 1761 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1762 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1763 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1764 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1765 case AMDGPU::OPERAND_REG_IMM_INT16: 1766 case AMDGPU::OPERAND_REG_IMM_FP16: 1767 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1768 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1769 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1770 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1771 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1772 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1773 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1774 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1775 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1776 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1777 bool lost; 1778 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1779 // Convert literal to single precision 1780 FPLiteral.convert(*getOpFltSemantics(OpTy), 1781 APFloat::rmNearestTiesToEven, &lost); 1782 // We allow precision lost but not overflow or underflow. This should be 1783 // checked earlier in isLiteralImm() 1784 1785 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1786 Inst.addOperand(MCOperand::createImm(ImmVal)); 1787 return; 1788 } 1789 default: 1790 llvm_unreachable("invalid operand size"); 1791 } 1792 1793 return; 1794 } 1795 1796 // We got int literal token. 1797 // Only sign extend inline immediates. 1798 switch (OpTy) { 1799 case AMDGPU::OPERAND_REG_IMM_INT32: 1800 case AMDGPU::OPERAND_REG_IMM_FP32: 1801 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1802 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1803 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1804 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1805 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1806 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1807 if (isSafeTruncation(Val, 32) && 1808 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1809 AsmParser->hasInv2PiInlineImm())) { 1810 Inst.addOperand(MCOperand::createImm(Val)); 1811 return; 1812 } 1813 1814 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1815 return; 1816 1817 case AMDGPU::OPERAND_REG_IMM_INT64: 1818 case AMDGPU::OPERAND_REG_IMM_FP64: 1819 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1820 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1821 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1822 Inst.addOperand(MCOperand::createImm(Val)); 1823 return; 1824 } 1825 1826 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1827 return; 1828 1829 case AMDGPU::OPERAND_REG_IMM_INT16: 1830 case AMDGPU::OPERAND_REG_IMM_FP16: 1831 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1832 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1833 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1834 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1835 if (isSafeTruncation(Val, 16) && 1836 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1837 AsmParser->hasInv2PiInlineImm())) { 1838 Inst.addOperand(MCOperand::createImm(Val)); 1839 return; 1840 } 1841 1842 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1843 return; 1844 1845 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1846 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1847 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1848 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1849 assert(isSafeTruncation(Val, 16)); 1850 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1851 AsmParser->hasInv2PiInlineImm())); 1852 1853 Inst.addOperand(MCOperand::createImm(Val)); 1854 return; 1855 } 1856 default: 1857 llvm_unreachable("invalid operand size"); 1858 } 1859 } 1860 1861 template <unsigned Bitwidth> 1862 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1863 APInt Literal(64, Imm.Val); 1864 1865 if (!Imm.IsFPImm) { 1866 // We got int literal token. 1867 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1868 return; 1869 } 1870 1871 bool Lost; 1872 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1873 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1874 APFloat::rmNearestTiesToEven, &Lost); 1875 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1876 } 1877 1878 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1879 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1880 } 1881 1882 static bool isInlineValue(unsigned Reg) { 1883 switch (Reg) { 1884 case AMDGPU::SRC_SHARED_BASE: 1885 case AMDGPU::SRC_SHARED_LIMIT: 1886 case AMDGPU::SRC_PRIVATE_BASE: 1887 case AMDGPU::SRC_PRIVATE_LIMIT: 1888 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1889 return true; 1890 case AMDGPU::SRC_VCCZ: 1891 case AMDGPU::SRC_EXECZ: 1892 case AMDGPU::SRC_SCC: 1893 return true; 1894 case AMDGPU::SGPR_NULL: 1895 return true; 1896 default: 1897 return false; 1898 } 1899 } 1900 1901 bool AMDGPUOperand::isInlineValue() const { 1902 return isRegKind() && ::isInlineValue(getReg()); 1903 } 1904 1905 //===----------------------------------------------------------------------===// 1906 // AsmParser 1907 //===----------------------------------------------------------------------===// 1908 1909 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1910 if (Is == IS_VGPR) { 1911 switch (RegWidth) { 1912 default: return -1; 1913 case 1: return AMDGPU::VGPR_32RegClassID; 1914 case 2: return AMDGPU::VReg_64RegClassID; 1915 case 3: return AMDGPU::VReg_96RegClassID; 1916 case 4: return AMDGPU::VReg_128RegClassID; 1917 case 5: return AMDGPU::VReg_160RegClassID; 1918 case 8: return AMDGPU::VReg_256RegClassID; 1919 case 16: return AMDGPU::VReg_512RegClassID; 1920 case 32: return AMDGPU::VReg_1024RegClassID; 1921 } 1922 } else if (Is == IS_TTMP) { 1923 switch (RegWidth) { 1924 default: return -1; 1925 case 1: return AMDGPU::TTMP_32RegClassID; 1926 case 2: return AMDGPU::TTMP_64RegClassID; 1927 case 4: return AMDGPU::TTMP_128RegClassID; 1928 case 8: return AMDGPU::TTMP_256RegClassID; 1929 case 16: return AMDGPU::TTMP_512RegClassID; 1930 } 1931 } else if (Is == IS_SGPR) { 1932 switch (RegWidth) { 1933 default: return -1; 1934 case 1: return AMDGPU::SGPR_32RegClassID; 1935 case 2: return AMDGPU::SGPR_64RegClassID; 1936 case 4: return AMDGPU::SGPR_128RegClassID; 1937 case 8: return AMDGPU::SGPR_256RegClassID; 1938 case 16: return AMDGPU::SGPR_512RegClassID; 1939 } 1940 } else if (Is == IS_AGPR) { 1941 switch (RegWidth) { 1942 default: return -1; 1943 case 1: return AMDGPU::AGPR_32RegClassID; 1944 case 2: return AMDGPU::AReg_64RegClassID; 1945 case 4: return AMDGPU::AReg_128RegClassID; 1946 case 16: return AMDGPU::AReg_512RegClassID; 1947 case 32: return AMDGPU::AReg_1024RegClassID; 1948 } 1949 } 1950 return -1; 1951 } 1952 1953 static unsigned getSpecialRegForName(StringRef RegName) { 1954 return StringSwitch<unsigned>(RegName) 1955 .Case("exec", AMDGPU::EXEC) 1956 .Case("vcc", AMDGPU::VCC) 1957 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1958 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1959 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1960 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1961 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1962 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1963 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1964 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1965 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1966 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1967 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1968 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1969 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1970 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1971 .Case("m0", AMDGPU::M0) 1972 .Case("vccz", AMDGPU::SRC_VCCZ) 1973 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1974 .Case("execz", AMDGPU::SRC_EXECZ) 1975 .Case("src_execz", AMDGPU::SRC_EXECZ) 1976 .Case("scc", AMDGPU::SRC_SCC) 1977 .Case("src_scc", AMDGPU::SRC_SCC) 1978 .Case("tba", AMDGPU::TBA) 1979 .Case("tma", AMDGPU::TMA) 1980 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1981 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1982 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1983 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1984 .Case("vcc_lo", AMDGPU::VCC_LO) 1985 .Case("vcc_hi", AMDGPU::VCC_HI) 1986 .Case("exec_lo", AMDGPU::EXEC_LO) 1987 .Case("exec_hi", AMDGPU::EXEC_HI) 1988 .Case("tma_lo", AMDGPU::TMA_LO) 1989 .Case("tma_hi", AMDGPU::TMA_HI) 1990 .Case("tba_lo", AMDGPU::TBA_LO) 1991 .Case("tba_hi", AMDGPU::TBA_HI) 1992 .Case("pc", AMDGPU::PC_REG) 1993 .Case("null", AMDGPU::SGPR_NULL) 1994 .Default(AMDGPU::NoRegister); 1995 } 1996 1997 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1998 SMLoc &EndLoc, bool RestoreOnFailure) { 1999 auto R = parseRegister(); 2000 if (!R) return true; 2001 assert(R->isReg()); 2002 RegNo = R->getReg(); 2003 StartLoc = R->getStartLoc(); 2004 EndLoc = R->getEndLoc(); 2005 return false; 2006 } 2007 2008 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2009 SMLoc &EndLoc) { 2010 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2011 } 2012 2013 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2014 SMLoc &StartLoc, 2015 SMLoc &EndLoc) { 2016 bool Result = 2017 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2018 bool PendingErrors = getParser().hasPendingError(); 2019 getParser().clearPendingErrors(); 2020 if (PendingErrors) 2021 return MatchOperand_ParseFail; 2022 if (Result) 2023 return MatchOperand_NoMatch; 2024 return MatchOperand_Success; 2025 } 2026 2027 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2028 RegisterKind RegKind, unsigned Reg1) { 2029 switch (RegKind) { 2030 case IS_SPECIAL: 2031 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2032 Reg = AMDGPU::EXEC; 2033 RegWidth = 2; 2034 return true; 2035 } 2036 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2037 Reg = AMDGPU::FLAT_SCR; 2038 RegWidth = 2; 2039 return true; 2040 } 2041 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2042 Reg = AMDGPU::XNACK_MASK; 2043 RegWidth = 2; 2044 return true; 2045 } 2046 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2047 Reg = AMDGPU::VCC; 2048 RegWidth = 2; 2049 return true; 2050 } 2051 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2052 Reg = AMDGPU::TBA; 2053 RegWidth = 2; 2054 return true; 2055 } 2056 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2057 Reg = AMDGPU::TMA; 2058 RegWidth = 2; 2059 return true; 2060 } 2061 return false; 2062 case IS_VGPR: 2063 case IS_SGPR: 2064 case IS_AGPR: 2065 case IS_TTMP: 2066 if (Reg1 != Reg + RegWidth) { 2067 return false; 2068 } 2069 RegWidth++; 2070 return true; 2071 default: 2072 llvm_unreachable("unexpected register kind"); 2073 } 2074 } 2075 2076 struct RegInfo { 2077 StringLiteral Name; 2078 RegisterKind Kind; 2079 }; 2080 2081 static constexpr RegInfo RegularRegisters[] = { 2082 {{"v"}, IS_VGPR}, 2083 {{"s"}, IS_SGPR}, 2084 {{"ttmp"}, IS_TTMP}, 2085 {{"acc"}, IS_AGPR}, 2086 {{"a"}, IS_AGPR}, 2087 }; 2088 2089 static bool isRegularReg(RegisterKind Kind) { 2090 return Kind == IS_VGPR || 2091 Kind == IS_SGPR || 2092 Kind == IS_TTMP || 2093 Kind == IS_AGPR; 2094 } 2095 2096 static const RegInfo* getRegularRegInfo(StringRef Str) { 2097 for (const RegInfo &Reg : RegularRegisters) 2098 if (Str.startswith(Reg.Name)) 2099 return &Reg; 2100 return nullptr; 2101 } 2102 2103 static bool getRegNum(StringRef Str, unsigned& Num) { 2104 return !Str.getAsInteger(10, Num); 2105 } 2106 2107 bool 2108 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2109 const AsmToken &NextToken) const { 2110 2111 // A list of consecutive registers: [s0,s1,s2,s3] 2112 if (Token.is(AsmToken::LBrac)) 2113 return true; 2114 2115 if (!Token.is(AsmToken::Identifier)) 2116 return false; 2117 2118 // A single register like s0 or a range of registers like s[0:1] 2119 2120 StringRef Str = Token.getString(); 2121 const RegInfo *Reg = getRegularRegInfo(Str); 2122 if (Reg) { 2123 StringRef RegName = Reg->Name; 2124 StringRef RegSuffix = Str.substr(RegName.size()); 2125 if (!RegSuffix.empty()) { 2126 unsigned Num; 2127 // A single register with an index: rXX 2128 if (getRegNum(RegSuffix, Num)) 2129 return true; 2130 } else { 2131 // A range of registers: r[XX:YY]. 2132 if (NextToken.is(AsmToken::LBrac)) 2133 return true; 2134 } 2135 } 2136 2137 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2138 } 2139 2140 bool 2141 AMDGPUAsmParser::isRegister() 2142 { 2143 return isRegister(getToken(), peekToken()); 2144 } 2145 2146 unsigned 2147 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2148 unsigned RegNum, 2149 unsigned RegWidth) { 2150 2151 assert(isRegularReg(RegKind)); 2152 2153 unsigned AlignSize = 1; 2154 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2155 // SGPR and TTMP registers must be aligned. 2156 // Max required alignment is 4 dwords. 2157 AlignSize = std::min(RegWidth, 4u); 2158 } 2159 2160 if (RegNum % AlignSize != 0) 2161 return AMDGPU::NoRegister; 2162 2163 unsigned RegIdx = RegNum / AlignSize; 2164 int RCID = getRegClass(RegKind, RegWidth); 2165 if (RCID == -1) 2166 return AMDGPU::NoRegister; 2167 2168 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2169 const MCRegisterClass RC = TRI->getRegClass(RCID); 2170 if (RegIdx >= RC.getNumRegs()) 2171 return AMDGPU::NoRegister; 2172 2173 return RC.getRegister(RegIdx); 2174 } 2175 2176 bool 2177 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2178 int64_t RegLo, RegHi; 2179 if (!trySkipToken(AsmToken::LBrac)) 2180 return false; 2181 2182 if (!parseExpr(RegLo)) 2183 return false; 2184 2185 if (trySkipToken(AsmToken::Colon)) { 2186 if (!parseExpr(RegHi)) 2187 return false; 2188 } else { 2189 RegHi = RegLo; 2190 } 2191 2192 if (!trySkipToken(AsmToken::RBrac)) 2193 return false; 2194 2195 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2196 return false; 2197 2198 Num = static_cast<unsigned>(RegLo); 2199 Width = (RegHi - RegLo) + 1; 2200 return true; 2201 } 2202 2203 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2204 unsigned &RegNum, unsigned &RegWidth, 2205 SmallVectorImpl<AsmToken> &Tokens) { 2206 assert(isToken(AsmToken::Identifier)); 2207 unsigned Reg = getSpecialRegForName(getTokenStr()); 2208 if (Reg) { 2209 RegNum = 0; 2210 RegWidth = 1; 2211 RegKind = IS_SPECIAL; 2212 Tokens.push_back(getToken()); 2213 lex(); // skip register name 2214 } 2215 return Reg; 2216 } 2217 2218 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2219 unsigned &RegNum, unsigned &RegWidth, 2220 SmallVectorImpl<AsmToken> &Tokens) { 2221 assert(isToken(AsmToken::Identifier)); 2222 StringRef RegName = getTokenStr(); 2223 2224 const RegInfo *RI = getRegularRegInfo(RegName); 2225 if (!RI) 2226 return AMDGPU::NoRegister; 2227 Tokens.push_back(getToken()); 2228 lex(); // skip register name 2229 2230 RegKind = RI->Kind; 2231 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2232 if (!RegSuffix.empty()) { 2233 // Single 32-bit register: vXX. 2234 if (!getRegNum(RegSuffix, RegNum)) 2235 return AMDGPU::NoRegister; 2236 RegWidth = 1; 2237 } else { 2238 // Range of registers: v[XX:YY]. ":YY" is optional. 2239 if (!ParseRegRange(RegNum, RegWidth)) 2240 return AMDGPU::NoRegister; 2241 } 2242 2243 return getRegularReg(RegKind, RegNum, RegWidth); 2244 } 2245 2246 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2247 unsigned &RegWidth, 2248 SmallVectorImpl<AsmToken> &Tokens) { 2249 unsigned Reg = AMDGPU::NoRegister; 2250 2251 if (!trySkipToken(AsmToken::LBrac)) 2252 return AMDGPU::NoRegister; 2253 2254 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2255 2256 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2257 return AMDGPU::NoRegister; 2258 if (RegWidth != 1) 2259 return AMDGPU::NoRegister; 2260 2261 for (; trySkipToken(AsmToken::Comma); ) { 2262 RegisterKind NextRegKind; 2263 unsigned NextReg, NextRegNum, NextRegWidth; 2264 2265 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth, 2266 Tokens)) 2267 return AMDGPU::NoRegister; 2268 if (NextRegWidth != 1) 2269 return AMDGPU::NoRegister; 2270 if (NextRegKind != RegKind) 2271 return AMDGPU::NoRegister; 2272 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2273 return AMDGPU::NoRegister; 2274 } 2275 2276 if (!trySkipToken(AsmToken::RBrac)) 2277 return AMDGPU::NoRegister; 2278 2279 if (isRegularReg(RegKind)) 2280 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2281 2282 return Reg; 2283 } 2284 2285 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2286 unsigned &RegNum, unsigned &RegWidth, 2287 SmallVectorImpl<AsmToken> &Tokens) { 2288 Reg = AMDGPU::NoRegister; 2289 2290 if (isToken(AsmToken::Identifier)) { 2291 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2292 if (Reg == AMDGPU::NoRegister) 2293 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2294 } else { 2295 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2296 } 2297 2298 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2299 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2300 } 2301 2302 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2303 unsigned &RegNum, unsigned &RegWidth, 2304 bool RestoreOnFailure) { 2305 Reg = AMDGPU::NoRegister; 2306 2307 SmallVector<AsmToken, 1> Tokens; 2308 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2309 if (RestoreOnFailure) { 2310 while (!Tokens.empty()) { 2311 getLexer().UnLex(Tokens.pop_back_val()); 2312 } 2313 } 2314 return true; 2315 } 2316 return false; 2317 } 2318 2319 Optional<StringRef> 2320 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2321 switch (RegKind) { 2322 case IS_VGPR: 2323 return StringRef(".amdgcn.next_free_vgpr"); 2324 case IS_SGPR: 2325 return StringRef(".amdgcn.next_free_sgpr"); 2326 default: 2327 return None; 2328 } 2329 } 2330 2331 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2332 auto SymbolName = getGprCountSymbolName(RegKind); 2333 assert(SymbolName && "initializing invalid register kind"); 2334 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2335 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2336 } 2337 2338 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2339 unsigned DwordRegIndex, 2340 unsigned RegWidth) { 2341 // Symbols are only defined for GCN targets 2342 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2343 return true; 2344 2345 auto SymbolName = getGprCountSymbolName(RegKind); 2346 if (!SymbolName) 2347 return true; 2348 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2349 2350 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2351 int64_t OldCount; 2352 2353 if (!Sym->isVariable()) 2354 return !Error(getParser().getTok().getLoc(), 2355 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2356 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2357 return !Error( 2358 getParser().getTok().getLoc(), 2359 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2360 2361 if (OldCount <= NewMax) 2362 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2363 2364 return true; 2365 } 2366 2367 std::unique_ptr<AMDGPUOperand> 2368 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2369 const auto &Tok = Parser.getTok(); 2370 SMLoc StartLoc = Tok.getLoc(); 2371 SMLoc EndLoc = Tok.getEndLoc(); 2372 RegisterKind RegKind; 2373 unsigned Reg, RegNum, RegWidth; 2374 2375 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2376 //FIXME: improve error messages (bug 41303). 2377 Error(StartLoc, "not a valid operand."); 2378 return nullptr; 2379 } 2380 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2381 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2382 return nullptr; 2383 } else 2384 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2385 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2386 } 2387 2388 OperandMatchResultTy 2389 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2390 // TODO: add syntactic sugar for 1/(2*PI) 2391 2392 assert(!isRegister()); 2393 assert(!isModifier()); 2394 2395 const auto& Tok = getToken(); 2396 const auto& NextTok = peekToken(); 2397 bool IsReal = Tok.is(AsmToken::Real); 2398 SMLoc S = getLoc(); 2399 bool Negate = false; 2400 2401 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2402 lex(); 2403 IsReal = true; 2404 Negate = true; 2405 } 2406 2407 if (IsReal) { 2408 // Floating-point expressions are not supported. 2409 // Can only allow floating-point literals with an 2410 // optional sign. 2411 2412 StringRef Num = getTokenStr(); 2413 lex(); 2414 2415 APFloat RealVal(APFloat::IEEEdouble()); 2416 auto roundMode = APFloat::rmNearestTiesToEven; 2417 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2418 return MatchOperand_ParseFail; 2419 } 2420 if (Negate) 2421 RealVal.changeSign(); 2422 2423 Operands.push_back( 2424 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2425 AMDGPUOperand::ImmTyNone, true)); 2426 2427 return MatchOperand_Success; 2428 2429 } else { 2430 int64_t IntVal; 2431 const MCExpr *Expr; 2432 SMLoc S = getLoc(); 2433 2434 if (HasSP3AbsModifier) { 2435 // This is a workaround for handling expressions 2436 // as arguments of SP3 'abs' modifier, for example: 2437 // |1.0| 2438 // |-1| 2439 // |1+x| 2440 // This syntax is not compatible with syntax of standard 2441 // MC expressions (due to the trailing '|'). 2442 SMLoc EndLoc; 2443 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2444 return MatchOperand_ParseFail; 2445 } else { 2446 if (Parser.parseExpression(Expr)) 2447 return MatchOperand_ParseFail; 2448 } 2449 2450 if (Expr->evaluateAsAbsolute(IntVal)) { 2451 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2452 } else { 2453 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2454 } 2455 2456 return MatchOperand_Success; 2457 } 2458 2459 return MatchOperand_NoMatch; 2460 } 2461 2462 OperandMatchResultTy 2463 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2464 if (!isRegister()) 2465 return MatchOperand_NoMatch; 2466 2467 if (auto R = parseRegister()) { 2468 assert(R->isReg()); 2469 Operands.push_back(std::move(R)); 2470 return MatchOperand_Success; 2471 } 2472 return MatchOperand_ParseFail; 2473 } 2474 2475 OperandMatchResultTy 2476 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2477 auto res = parseReg(Operands); 2478 if (res != MatchOperand_NoMatch) { 2479 return res; 2480 } else if (isModifier()) { 2481 return MatchOperand_NoMatch; 2482 } else { 2483 return parseImm(Operands, HasSP3AbsMod); 2484 } 2485 } 2486 2487 bool 2488 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2489 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2490 const auto &str = Token.getString(); 2491 return str == "abs" || str == "neg" || str == "sext"; 2492 } 2493 return false; 2494 } 2495 2496 bool 2497 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2498 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2499 } 2500 2501 bool 2502 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2503 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2504 } 2505 2506 bool 2507 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2508 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2509 } 2510 2511 // Check if this is an operand modifier or an opcode modifier 2512 // which may look like an expression but it is not. We should 2513 // avoid parsing these modifiers as expressions. Currently 2514 // recognized sequences are: 2515 // |...| 2516 // abs(...) 2517 // neg(...) 2518 // sext(...) 2519 // -reg 2520 // -|...| 2521 // -abs(...) 2522 // name:... 2523 // Note that simple opcode modifiers like 'gds' may be parsed as 2524 // expressions; this is a special case. See getExpressionAsToken. 2525 // 2526 bool 2527 AMDGPUAsmParser::isModifier() { 2528 2529 AsmToken Tok = getToken(); 2530 AsmToken NextToken[2]; 2531 peekTokens(NextToken); 2532 2533 return isOperandModifier(Tok, NextToken[0]) || 2534 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2535 isOpcodeModifierWithVal(Tok, NextToken[0]); 2536 } 2537 2538 // Check if the current token is an SP3 'neg' modifier. 2539 // Currently this modifier is allowed in the following context: 2540 // 2541 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2542 // 2. Before an 'abs' modifier: -abs(...) 2543 // 3. Before an SP3 'abs' modifier: -|...| 2544 // 2545 // In all other cases "-" is handled as a part 2546 // of an expression that follows the sign. 2547 // 2548 // Note: When "-" is followed by an integer literal, 2549 // this is interpreted as integer negation rather 2550 // than a floating-point NEG modifier applied to N. 2551 // Beside being contr-intuitive, such use of floating-point 2552 // NEG modifier would have resulted in different meaning 2553 // of integer literals used with VOP1/2/C and VOP3, 2554 // for example: 2555 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2556 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2557 // Negative fp literals with preceding "-" are 2558 // handled likewise for unifomtity 2559 // 2560 bool 2561 AMDGPUAsmParser::parseSP3NegModifier() { 2562 2563 AsmToken NextToken[2]; 2564 peekTokens(NextToken); 2565 2566 if (isToken(AsmToken::Minus) && 2567 (isRegister(NextToken[0], NextToken[1]) || 2568 NextToken[0].is(AsmToken::Pipe) || 2569 isId(NextToken[0], "abs"))) { 2570 lex(); 2571 return true; 2572 } 2573 2574 return false; 2575 } 2576 2577 OperandMatchResultTy 2578 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2579 bool AllowImm) { 2580 bool Neg, SP3Neg; 2581 bool Abs, SP3Abs; 2582 SMLoc Loc; 2583 2584 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2585 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2586 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2587 return MatchOperand_ParseFail; 2588 } 2589 2590 SP3Neg = parseSP3NegModifier(); 2591 2592 Loc = getLoc(); 2593 Neg = trySkipId("neg"); 2594 if (Neg && SP3Neg) { 2595 Error(Loc, "expected register or immediate"); 2596 return MatchOperand_ParseFail; 2597 } 2598 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2599 return MatchOperand_ParseFail; 2600 2601 Abs = trySkipId("abs"); 2602 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2603 return MatchOperand_ParseFail; 2604 2605 Loc = getLoc(); 2606 SP3Abs = trySkipToken(AsmToken::Pipe); 2607 if (Abs && SP3Abs) { 2608 Error(Loc, "expected register or immediate"); 2609 return MatchOperand_ParseFail; 2610 } 2611 2612 OperandMatchResultTy Res; 2613 if (AllowImm) { 2614 Res = parseRegOrImm(Operands, SP3Abs); 2615 } else { 2616 Res = parseReg(Operands); 2617 } 2618 if (Res != MatchOperand_Success) { 2619 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2620 } 2621 2622 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2623 return MatchOperand_ParseFail; 2624 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2625 return MatchOperand_ParseFail; 2626 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2627 return MatchOperand_ParseFail; 2628 2629 AMDGPUOperand::Modifiers Mods; 2630 Mods.Abs = Abs || SP3Abs; 2631 Mods.Neg = Neg || SP3Neg; 2632 2633 if (Mods.hasFPModifiers()) { 2634 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2635 if (Op.isExpr()) { 2636 Error(Op.getStartLoc(), "expected an absolute expression"); 2637 return MatchOperand_ParseFail; 2638 } 2639 Op.setModifiers(Mods); 2640 } 2641 return MatchOperand_Success; 2642 } 2643 2644 OperandMatchResultTy 2645 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2646 bool AllowImm) { 2647 bool Sext = trySkipId("sext"); 2648 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2649 return MatchOperand_ParseFail; 2650 2651 OperandMatchResultTy Res; 2652 if (AllowImm) { 2653 Res = parseRegOrImm(Operands); 2654 } else { 2655 Res = parseReg(Operands); 2656 } 2657 if (Res != MatchOperand_Success) { 2658 return Sext? MatchOperand_ParseFail : Res; 2659 } 2660 2661 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2662 return MatchOperand_ParseFail; 2663 2664 AMDGPUOperand::Modifiers Mods; 2665 Mods.Sext = Sext; 2666 2667 if (Mods.hasIntModifiers()) { 2668 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2669 if (Op.isExpr()) { 2670 Error(Op.getStartLoc(), "expected an absolute expression"); 2671 return MatchOperand_ParseFail; 2672 } 2673 Op.setModifiers(Mods); 2674 } 2675 2676 return MatchOperand_Success; 2677 } 2678 2679 OperandMatchResultTy 2680 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2681 return parseRegOrImmWithFPInputMods(Operands, false); 2682 } 2683 2684 OperandMatchResultTy 2685 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2686 return parseRegOrImmWithIntInputMods(Operands, false); 2687 } 2688 2689 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2690 auto Loc = getLoc(); 2691 if (trySkipId("off")) { 2692 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2693 AMDGPUOperand::ImmTyOff, false)); 2694 return MatchOperand_Success; 2695 } 2696 2697 if (!isRegister()) 2698 return MatchOperand_NoMatch; 2699 2700 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2701 if (Reg) { 2702 Operands.push_back(std::move(Reg)); 2703 return MatchOperand_Success; 2704 } 2705 2706 return MatchOperand_ParseFail; 2707 2708 } 2709 2710 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2711 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2712 2713 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2714 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2715 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2716 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2717 return Match_InvalidOperand; 2718 2719 if ((TSFlags & SIInstrFlags::VOP3) && 2720 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2721 getForcedEncodingSize() != 64) 2722 return Match_PreferE32; 2723 2724 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2725 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2726 // v_mac_f32/16 allow only dst_sel == DWORD; 2727 auto OpNum = 2728 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2729 const auto &Op = Inst.getOperand(OpNum); 2730 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2731 return Match_InvalidOperand; 2732 } 2733 } 2734 2735 return Match_Success; 2736 } 2737 2738 // What asm variants we should check 2739 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2740 if (getForcedEncodingSize() == 32) { 2741 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2742 return makeArrayRef(Variants); 2743 } 2744 2745 if (isForcedVOP3()) { 2746 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2747 return makeArrayRef(Variants); 2748 } 2749 2750 if (isForcedSDWA()) { 2751 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2752 AMDGPUAsmVariants::SDWA9}; 2753 return makeArrayRef(Variants); 2754 } 2755 2756 if (isForcedDPP()) { 2757 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2758 return makeArrayRef(Variants); 2759 } 2760 2761 static const unsigned Variants[] = { 2762 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2763 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2764 }; 2765 2766 return makeArrayRef(Variants); 2767 } 2768 2769 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2770 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2771 const unsigned Num = Desc.getNumImplicitUses(); 2772 for (unsigned i = 0; i < Num; ++i) { 2773 unsigned Reg = Desc.ImplicitUses[i]; 2774 switch (Reg) { 2775 case AMDGPU::FLAT_SCR: 2776 case AMDGPU::VCC: 2777 case AMDGPU::VCC_LO: 2778 case AMDGPU::VCC_HI: 2779 case AMDGPU::M0: 2780 return Reg; 2781 default: 2782 break; 2783 } 2784 } 2785 return AMDGPU::NoRegister; 2786 } 2787 2788 // NB: This code is correct only when used to check constant 2789 // bus limitations because GFX7 support no f16 inline constants. 2790 // Note that there are no cases when a GFX7 opcode violates 2791 // constant bus limitations due to the use of an f16 constant. 2792 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2793 unsigned OpIdx) const { 2794 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2795 2796 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2797 return false; 2798 } 2799 2800 const MCOperand &MO = Inst.getOperand(OpIdx); 2801 2802 int64_t Val = MO.getImm(); 2803 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2804 2805 switch (OpSize) { // expected operand size 2806 case 8: 2807 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2808 case 4: 2809 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2810 case 2: { 2811 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2812 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2813 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2814 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2815 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2816 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2817 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2818 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2819 } else { 2820 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2821 } 2822 } 2823 default: 2824 llvm_unreachable("invalid operand size"); 2825 } 2826 } 2827 2828 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2829 if (!isGFX10()) 2830 return 1; 2831 2832 switch (Opcode) { 2833 // 64-bit shift instructions can use only one scalar value input 2834 case AMDGPU::V_LSHLREV_B64: 2835 case AMDGPU::V_LSHLREV_B64_gfx10: 2836 case AMDGPU::V_LSHL_B64: 2837 case AMDGPU::V_LSHRREV_B64: 2838 case AMDGPU::V_LSHRREV_B64_gfx10: 2839 case AMDGPU::V_LSHR_B64: 2840 case AMDGPU::V_ASHRREV_I64: 2841 case AMDGPU::V_ASHRREV_I64_gfx10: 2842 case AMDGPU::V_ASHR_I64: 2843 return 1; 2844 default: 2845 return 2; 2846 } 2847 } 2848 2849 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2850 const MCOperand &MO = Inst.getOperand(OpIdx); 2851 if (MO.isImm()) { 2852 return !isInlineConstant(Inst, OpIdx); 2853 } else if (MO.isReg()) { 2854 auto Reg = MO.getReg(); 2855 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2856 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2857 } else { 2858 return true; 2859 } 2860 } 2861 2862 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2863 const unsigned Opcode = Inst.getOpcode(); 2864 const MCInstrDesc &Desc = MII.get(Opcode); 2865 unsigned ConstantBusUseCount = 0; 2866 unsigned NumLiterals = 0; 2867 unsigned LiteralSize; 2868 2869 if (Desc.TSFlags & 2870 (SIInstrFlags::VOPC | 2871 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2872 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2873 SIInstrFlags::SDWA)) { 2874 // Check special imm operands (used by madmk, etc) 2875 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2876 ++ConstantBusUseCount; 2877 } 2878 2879 SmallDenseSet<unsigned> SGPRsUsed; 2880 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2881 if (SGPRUsed != AMDGPU::NoRegister) { 2882 SGPRsUsed.insert(SGPRUsed); 2883 ++ConstantBusUseCount; 2884 } 2885 2886 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2887 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2888 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2889 2890 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2891 2892 for (int OpIdx : OpIndices) { 2893 if (OpIdx == -1) break; 2894 2895 const MCOperand &MO = Inst.getOperand(OpIdx); 2896 if (usesConstantBus(Inst, OpIdx)) { 2897 if (MO.isReg()) { 2898 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2899 // Pairs of registers with a partial intersections like these 2900 // s0, s[0:1] 2901 // flat_scratch_lo, flat_scratch 2902 // flat_scratch_lo, flat_scratch_hi 2903 // are theoretically valid but they are disabled anyway. 2904 // Note that this code mimics SIInstrInfo::verifyInstruction 2905 if (!SGPRsUsed.count(Reg)) { 2906 SGPRsUsed.insert(Reg); 2907 ++ConstantBusUseCount; 2908 } 2909 } else { // Expression or a literal 2910 2911 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2912 continue; // special operand like VINTERP attr_chan 2913 2914 // An instruction may use only one literal. 2915 // This has been validated on the previous step. 2916 // See validateVOP3Literal. 2917 // This literal may be used as more than one operand. 2918 // If all these operands are of the same size, 2919 // this literal counts as one scalar value. 2920 // Otherwise it counts as 2 scalar values. 2921 // See "GFX10 Shader Programming", section 3.6.2.3. 2922 2923 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2924 if (Size < 4) Size = 4; 2925 2926 if (NumLiterals == 0) { 2927 NumLiterals = 1; 2928 LiteralSize = Size; 2929 } else if (LiteralSize != Size) { 2930 NumLiterals = 2; 2931 } 2932 } 2933 } 2934 } 2935 } 2936 ConstantBusUseCount += NumLiterals; 2937 2938 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2939 } 2940 2941 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2942 const unsigned Opcode = Inst.getOpcode(); 2943 const MCInstrDesc &Desc = MII.get(Opcode); 2944 2945 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2946 if (DstIdx == -1 || 2947 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2948 return true; 2949 } 2950 2951 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2952 2953 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2954 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2955 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2956 2957 assert(DstIdx != -1); 2958 const MCOperand &Dst = Inst.getOperand(DstIdx); 2959 assert(Dst.isReg()); 2960 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2961 2962 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2963 2964 for (int SrcIdx : SrcIndices) { 2965 if (SrcIdx == -1) break; 2966 const MCOperand &Src = Inst.getOperand(SrcIdx); 2967 if (Src.isReg()) { 2968 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2969 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2970 return false; 2971 } 2972 } 2973 } 2974 2975 return true; 2976 } 2977 2978 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2979 2980 const unsigned Opc = Inst.getOpcode(); 2981 const MCInstrDesc &Desc = MII.get(Opc); 2982 2983 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2984 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2985 assert(ClampIdx != -1); 2986 return Inst.getOperand(ClampIdx).getImm() == 0; 2987 } 2988 2989 return true; 2990 } 2991 2992 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2993 2994 const unsigned Opc = Inst.getOpcode(); 2995 const MCInstrDesc &Desc = MII.get(Opc); 2996 2997 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2998 return true; 2999 3000 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3001 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3002 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3003 3004 assert(VDataIdx != -1); 3005 assert(DMaskIdx != -1); 3006 assert(TFEIdx != -1); 3007 3008 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3009 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3010 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3011 if (DMask == 0) 3012 DMask = 1; 3013 3014 unsigned DataSize = 3015 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3016 if (hasPackedD16()) { 3017 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3018 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3019 DataSize = (DataSize + 1) / 2; 3020 } 3021 3022 return (VDataSize / 4) == DataSize + TFESize; 3023 } 3024 3025 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3026 const unsigned Opc = Inst.getOpcode(); 3027 const MCInstrDesc &Desc = MII.get(Opc); 3028 3029 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3030 return true; 3031 3032 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3033 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3034 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3035 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3036 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3037 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3038 3039 assert(VAddr0Idx != -1); 3040 assert(SrsrcIdx != -1); 3041 assert(DimIdx != -1); 3042 assert(SrsrcIdx > VAddr0Idx); 3043 3044 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3045 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3046 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3047 unsigned VAddrSize = 3048 IsNSA ? SrsrcIdx - VAddr0Idx 3049 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3050 3051 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3052 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3053 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3054 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3055 if (!IsNSA) { 3056 if (AddrSize > 8) 3057 AddrSize = 16; 3058 else if (AddrSize > 4) 3059 AddrSize = 8; 3060 } 3061 3062 return VAddrSize == AddrSize; 3063 } 3064 3065 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3066 3067 const unsigned Opc = Inst.getOpcode(); 3068 const MCInstrDesc &Desc = MII.get(Opc); 3069 3070 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3071 return true; 3072 if (!Desc.mayLoad() || !Desc.mayStore()) 3073 return true; // Not atomic 3074 3075 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3076 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3077 3078 // This is an incomplete check because image_atomic_cmpswap 3079 // may only use 0x3 and 0xf while other atomic operations 3080 // may use 0x1 and 0x3. However these limitations are 3081 // verified when we check that dmask matches dst size. 3082 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3083 } 3084 3085 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3086 3087 const unsigned Opc = Inst.getOpcode(); 3088 const MCInstrDesc &Desc = MII.get(Opc); 3089 3090 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3091 return true; 3092 3093 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3094 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3095 3096 // GATHER4 instructions use dmask in a different fashion compared to 3097 // other MIMG instructions. The only useful DMASK values are 3098 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3099 // (red,red,red,red) etc.) The ISA document doesn't mention 3100 // this. 3101 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3102 } 3103 3104 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3105 { 3106 switch (Opcode) { 3107 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3108 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3109 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3110 return true; 3111 default: 3112 return false; 3113 } 3114 } 3115 3116 // movrels* opcodes should only allow VGPRS as src0. 3117 // This is specified in .td description for vop1/vop3, 3118 // but sdwa is handled differently. See isSDWAOperand. 3119 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3120 3121 const unsigned Opc = Inst.getOpcode(); 3122 const MCInstrDesc &Desc = MII.get(Opc); 3123 3124 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3125 return true; 3126 3127 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3128 assert(Src0Idx != -1); 3129 3130 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3131 if (!Src0.isReg()) 3132 return false; 3133 3134 auto Reg = Src0.getReg(); 3135 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3136 return !isSGPR(mc2PseudoReg(Reg), TRI); 3137 } 3138 3139 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3140 3141 const unsigned Opc = Inst.getOpcode(); 3142 const MCInstrDesc &Desc = MII.get(Opc); 3143 3144 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3145 return true; 3146 3147 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3148 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3149 if (isCI() || isSI()) 3150 return false; 3151 } 3152 3153 return true; 3154 } 3155 3156 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3157 const unsigned Opc = Inst.getOpcode(); 3158 const MCInstrDesc &Desc = MII.get(Opc); 3159 3160 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3161 return true; 3162 3163 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3164 if (DimIdx < 0) 3165 return true; 3166 3167 long Imm = Inst.getOperand(DimIdx).getImm(); 3168 if (Imm < 0 || Imm >= 8) 3169 return false; 3170 3171 return true; 3172 } 3173 3174 static bool IsRevOpcode(const unsigned Opcode) 3175 { 3176 switch (Opcode) { 3177 case AMDGPU::V_SUBREV_F32_e32: 3178 case AMDGPU::V_SUBREV_F32_e64: 3179 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3180 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3181 case AMDGPU::V_SUBREV_F32_e32_vi: 3182 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3183 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3184 case AMDGPU::V_SUBREV_F32_e64_vi: 3185 3186 case AMDGPU::V_SUBREV_I32_e32: 3187 case AMDGPU::V_SUBREV_I32_e64: 3188 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3189 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3190 3191 case AMDGPU::V_SUBBREV_U32_e32: 3192 case AMDGPU::V_SUBBREV_U32_e64: 3193 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3194 case AMDGPU::V_SUBBREV_U32_e32_vi: 3195 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3196 case AMDGPU::V_SUBBREV_U32_e64_vi: 3197 3198 case AMDGPU::V_SUBREV_U32_e32: 3199 case AMDGPU::V_SUBREV_U32_e64: 3200 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3201 case AMDGPU::V_SUBREV_U32_e32_vi: 3202 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3203 case AMDGPU::V_SUBREV_U32_e64_vi: 3204 3205 case AMDGPU::V_SUBREV_F16_e32: 3206 case AMDGPU::V_SUBREV_F16_e64: 3207 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3208 case AMDGPU::V_SUBREV_F16_e32_vi: 3209 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3210 case AMDGPU::V_SUBREV_F16_e64_vi: 3211 3212 case AMDGPU::V_SUBREV_U16_e32: 3213 case AMDGPU::V_SUBREV_U16_e64: 3214 case AMDGPU::V_SUBREV_U16_e32_vi: 3215 case AMDGPU::V_SUBREV_U16_e64_vi: 3216 3217 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3218 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3219 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3220 3221 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3222 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3223 3224 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3225 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3226 3227 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3228 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3229 3230 case AMDGPU::V_LSHRREV_B32_e32: 3231 case AMDGPU::V_LSHRREV_B32_e64: 3232 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3233 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3234 case AMDGPU::V_LSHRREV_B32_e32_vi: 3235 case AMDGPU::V_LSHRREV_B32_e64_vi: 3236 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3237 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3238 3239 case AMDGPU::V_ASHRREV_I32_e32: 3240 case AMDGPU::V_ASHRREV_I32_e64: 3241 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3242 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3243 case AMDGPU::V_ASHRREV_I32_e32_vi: 3244 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3245 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3246 case AMDGPU::V_ASHRREV_I32_e64_vi: 3247 3248 case AMDGPU::V_LSHLREV_B32_e32: 3249 case AMDGPU::V_LSHLREV_B32_e64: 3250 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3251 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3252 case AMDGPU::V_LSHLREV_B32_e32_vi: 3253 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3254 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3255 case AMDGPU::V_LSHLREV_B32_e64_vi: 3256 3257 case AMDGPU::V_LSHLREV_B16_e32: 3258 case AMDGPU::V_LSHLREV_B16_e64: 3259 case AMDGPU::V_LSHLREV_B16_e32_vi: 3260 case AMDGPU::V_LSHLREV_B16_e64_vi: 3261 case AMDGPU::V_LSHLREV_B16_gfx10: 3262 3263 case AMDGPU::V_LSHRREV_B16_e32: 3264 case AMDGPU::V_LSHRREV_B16_e64: 3265 case AMDGPU::V_LSHRREV_B16_e32_vi: 3266 case AMDGPU::V_LSHRREV_B16_e64_vi: 3267 case AMDGPU::V_LSHRREV_B16_gfx10: 3268 3269 case AMDGPU::V_ASHRREV_I16_e32: 3270 case AMDGPU::V_ASHRREV_I16_e64: 3271 case AMDGPU::V_ASHRREV_I16_e32_vi: 3272 case AMDGPU::V_ASHRREV_I16_e64_vi: 3273 case AMDGPU::V_ASHRREV_I16_gfx10: 3274 3275 case AMDGPU::V_LSHLREV_B64: 3276 case AMDGPU::V_LSHLREV_B64_gfx10: 3277 case AMDGPU::V_LSHLREV_B64_vi: 3278 3279 case AMDGPU::V_LSHRREV_B64: 3280 case AMDGPU::V_LSHRREV_B64_gfx10: 3281 case AMDGPU::V_LSHRREV_B64_vi: 3282 3283 case AMDGPU::V_ASHRREV_I64: 3284 case AMDGPU::V_ASHRREV_I64_gfx10: 3285 case AMDGPU::V_ASHRREV_I64_vi: 3286 3287 case AMDGPU::V_PK_LSHLREV_B16: 3288 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3289 case AMDGPU::V_PK_LSHLREV_B16_vi: 3290 3291 case AMDGPU::V_PK_LSHRREV_B16: 3292 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3293 case AMDGPU::V_PK_LSHRREV_B16_vi: 3294 case AMDGPU::V_PK_ASHRREV_I16: 3295 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3296 case AMDGPU::V_PK_ASHRREV_I16_vi: 3297 return true; 3298 default: 3299 return false; 3300 } 3301 } 3302 3303 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3304 3305 using namespace SIInstrFlags; 3306 const unsigned Opcode = Inst.getOpcode(); 3307 const MCInstrDesc &Desc = MII.get(Opcode); 3308 3309 // lds_direct register is defined so that it can be used 3310 // with 9-bit operands only. Ignore encodings which do not accept these. 3311 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3312 return true; 3313 3314 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3315 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3316 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3317 3318 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3319 3320 // lds_direct cannot be specified as either src1 or src2. 3321 for (int SrcIdx : SrcIndices) { 3322 if (SrcIdx == -1) break; 3323 const MCOperand &Src = Inst.getOperand(SrcIdx); 3324 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3325 return false; 3326 } 3327 } 3328 3329 if (Src0Idx == -1) 3330 return true; 3331 3332 const MCOperand &Src = Inst.getOperand(Src0Idx); 3333 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3334 return true; 3335 3336 // lds_direct is specified as src0. Check additional limitations. 3337 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3338 } 3339 3340 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3341 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3342 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3343 if (Op.isFlatOffset()) 3344 return Op.getStartLoc(); 3345 } 3346 return getLoc(); 3347 } 3348 3349 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3350 const OperandVector &Operands) { 3351 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3352 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3353 return true; 3354 3355 auto Opcode = Inst.getOpcode(); 3356 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3357 assert(OpNum != -1); 3358 3359 const auto &Op = Inst.getOperand(OpNum); 3360 if (!hasFlatOffsets() && Op.getImm() != 0) { 3361 Error(getFlatOffsetLoc(Operands), 3362 "flat offset modifier is not supported on this GPU"); 3363 return false; 3364 } 3365 3366 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3367 // For FLAT segment the offset must be positive; 3368 // MSB is ignored and forced to zero. 3369 unsigned OffsetSize = isGFX9() ? 13 : 12; 3370 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3371 if (!isIntN(OffsetSize, Op.getImm())) { 3372 Error(getFlatOffsetLoc(Operands), 3373 isGFX9() ? "expected a 13-bit signed offset" : 3374 "expected a 12-bit signed offset"); 3375 return false; 3376 } 3377 } else { 3378 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3379 Error(getFlatOffsetLoc(Operands), 3380 isGFX9() ? "expected a 12-bit unsigned offset" : 3381 "expected an 11-bit unsigned offset"); 3382 return false; 3383 } 3384 } 3385 3386 return true; 3387 } 3388 3389 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3390 unsigned Opcode = Inst.getOpcode(); 3391 const MCInstrDesc &Desc = MII.get(Opcode); 3392 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3393 return true; 3394 3395 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3396 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3397 3398 const int OpIndices[] = { Src0Idx, Src1Idx }; 3399 3400 unsigned NumExprs = 0; 3401 unsigned NumLiterals = 0; 3402 uint32_t LiteralValue; 3403 3404 for (int OpIdx : OpIndices) { 3405 if (OpIdx == -1) break; 3406 3407 const MCOperand &MO = Inst.getOperand(OpIdx); 3408 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3409 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3410 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3411 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3412 if (NumLiterals == 0 || LiteralValue != Value) { 3413 LiteralValue = Value; 3414 ++NumLiterals; 3415 } 3416 } else if (MO.isExpr()) { 3417 ++NumExprs; 3418 } 3419 } 3420 } 3421 3422 return NumLiterals + NumExprs <= 1; 3423 } 3424 3425 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3426 const unsigned Opc = Inst.getOpcode(); 3427 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3428 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3429 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3430 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3431 3432 if (OpSel & ~3) 3433 return false; 3434 } 3435 return true; 3436 } 3437 3438 // Check if VCC register matches wavefront size 3439 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3440 auto FB = getFeatureBits(); 3441 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3442 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3443 } 3444 3445 // VOP3 literal is only allowed in GFX10+ and only one can be used 3446 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3447 unsigned Opcode = Inst.getOpcode(); 3448 const MCInstrDesc &Desc = MII.get(Opcode); 3449 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3450 return true; 3451 3452 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3453 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3454 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3455 3456 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3457 3458 unsigned NumExprs = 0; 3459 unsigned NumLiterals = 0; 3460 uint32_t LiteralValue; 3461 3462 for (int OpIdx : OpIndices) { 3463 if (OpIdx == -1) break; 3464 3465 const MCOperand &MO = Inst.getOperand(OpIdx); 3466 if (!MO.isImm() && !MO.isExpr()) 3467 continue; 3468 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3469 continue; 3470 3471 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3472 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3473 return false; 3474 3475 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3476 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3477 if (NumLiterals == 0 || LiteralValue != Value) { 3478 LiteralValue = Value; 3479 ++NumLiterals; 3480 } 3481 } else if (MO.isExpr()) { 3482 ++NumExprs; 3483 } 3484 } 3485 NumLiterals += NumExprs; 3486 3487 return !NumLiterals || 3488 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3489 } 3490 3491 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3492 const SMLoc &IDLoc, 3493 const OperandVector &Operands) { 3494 if (!validateLdsDirect(Inst)) { 3495 Error(IDLoc, 3496 "invalid use of lds_direct"); 3497 return false; 3498 } 3499 if (!validateSOPLiteral(Inst)) { 3500 Error(IDLoc, 3501 "only one literal operand is allowed"); 3502 return false; 3503 } 3504 if (!validateVOP3Literal(Inst)) { 3505 Error(IDLoc, 3506 "invalid literal operand"); 3507 return false; 3508 } 3509 if (!validateConstantBusLimitations(Inst)) { 3510 Error(IDLoc, 3511 "invalid operand (violates constant bus restrictions)"); 3512 return false; 3513 } 3514 if (!validateEarlyClobberLimitations(Inst)) { 3515 Error(IDLoc, 3516 "destination must be different than all sources"); 3517 return false; 3518 } 3519 if (!validateIntClampSupported(Inst)) { 3520 Error(IDLoc, 3521 "integer clamping is not supported on this GPU"); 3522 return false; 3523 } 3524 if (!validateOpSel(Inst)) { 3525 Error(IDLoc, 3526 "invalid op_sel operand"); 3527 return false; 3528 } 3529 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3530 if (!validateMIMGD16(Inst)) { 3531 Error(IDLoc, 3532 "d16 modifier is not supported on this GPU"); 3533 return false; 3534 } 3535 if (!validateMIMGDim(Inst)) { 3536 Error(IDLoc, "dim modifier is required on this GPU"); 3537 return false; 3538 } 3539 if (!validateMIMGDataSize(Inst)) { 3540 Error(IDLoc, 3541 "image data size does not match dmask and tfe"); 3542 return false; 3543 } 3544 if (!validateMIMGAddrSize(Inst)) { 3545 Error(IDLoc, 3546 "image address size does not match dim and a16"); 3547 return false; 3548 } 3549 if (!validateMIMGAtomicDMask(Inst)) { 3550 Error(IDLoc, 3551 "invalid atomic image dmask"); 3552 return false; 3553 } 3554 if (!validateMIMGGatherDMask(Inst)) { 3555 Error(IDLoc, 3556 "invalid image_gather dmask: only one bit must be set"); 3557 return false; 3558 } 3559 if (!validateMovrels(Inst)) { 3560 Error(IDLoc, "source operand must be a VGPR"); 3561 return false; 3562 } 3563 if (!validateFlatOffset(Inst, Operands)) { 3564 return false; 3565 } 3566 3567 return true; 3568 } 3569 3570 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3571 const FeatureBitset &FBS, 3572 unsigned VariantID = 0); 3573 3574 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3575 OperandVector &Operands, 3576 MCStreamer &Out, 3577 uint64_t &ErrorInfo, 3578 bool MatchingInlineAsm) { 3579 MCInst Inst; 3580 unsigned Result = Match_Success; 3581 for (auto Variant : getMatchedVariants()) { 3582 uint64_t EI; 3583 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3584 Variant); 3585 // We order match statuses from least to most specific. We use most specific 3586 // status as resulting 3587 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3588 if ((R == Match_Success) || 3589 (R == Match_PreferE32) || 3590 (R == Match_MissingFeature && Result != Match_PreferE32) || 3591 (R == Match_InvalidOperand && Result != Match_MissingFeature 3592 && Result != Match_PreferE32) || 3593 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3594 && Result != Match_MissingFeature 3595 && Result != Match_PreferE32)) { 3596 Result = R; 3597 ErrorInfo = EI; 3598 } 3599 if (R == Match_Success) 3600 break; 3601 } 3602 3603 switch (Result) { 3604 default: break; 3605 case Match_Success: 3606 if (!validateInstruction(Inst, IDLoc, Operands)) { 3607 return true; 3608 } 3609 Inst.setLoc(IDLoc); 3610 Out.emitInstruction(Inst, getSTI()); 3611 return false; 3612 3613 case Match_MissingFeature: 3614 return Error(IDLoc, "instruction not supported on this GPU"); 3615 3616 case Match_MnemonicFail: { 3617 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3618 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3619 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3620 return Error(IDLoc, "invalid instruction" + Suggestion, 3621 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3622 } 3623 3624 case Match_InvalidOperand: { 3625 SMLoc ErrorLoc = IDLoc; 3626 if (ErrorInfo != ~0ULL) { 3627 if (ErrorInfo >= Operands.size()) { 3628 return Error(IDLoc, "too few operands for instruction"); 3629 } 3630 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3631 if (ErrorLoc == SMLoc()) 3632 ErrorLoc = IDLoc; 3633 } 3634 return Error(ErrorLoc, "invalid operand for instruction"); 3635 } 3636 3637 case Match_PreferE32: 3638 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3639 "should be encoded as e32"); 3640 } 3641 llvm_unreachable("Implement any new match types added!"); 3642 } 3643 3644 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3645 int64_t Tmp = -1; 3646 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3647 return true; 3648 } 3649 if (getParser().parseAbsoluteExpression(Tmp)) { 3650 return true; 3651 } 3652 Ret = static_cast<uint32_t>(Tmp); 3653 return false; 3654 } 3655 3656 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3657 uint32_t &Minor) { 3658 if (ParseAsAbsoluteExpression(Major)) 3659 return TokError("invalid major version"); 3660 3661 if (getLexer().isNot(AsmToken::Comma)) 3662 return TokError("minor version number required, comma expected"); 3663 Lex(); 3664 3665 if (ParseAsAbsoluteExpression(Minor)) 3666 return TokError("invalid minor version"); 3667 3668 return false; 3669 } 3670 3671 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3672 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3673 return TokError("directive only supported for amdgcn architecture"); 3674 3675 std::string Target; 3676 3677 SMLoc TargetStart = getTok().getLoc(); 3678 if (getParser().parseEscapedString(Target)) 3679 return true; 3680 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3681 3682 std::string ExpectedTarget; 3683 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3684 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3685 3686 if (Target != ExpectedTargetOS.str()) 3687 return getParser().Error(TargetRange.Start, "target must match options", 3688 TargetRange); 3689 3690 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3691 return false; 3692 } 3693 3694 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3695 return getParser().Error(Range.Start, "value out of range", Range); 3696 } 3697 3698 bool AMDGPUAsmParser::calculateGPRBlocks( 3699 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3700 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3701 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3702 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3703 // TODO(scott.linder): These calculations are duplicated from 3704 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3705 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3706 3707 unsigned NumVGPRs = NextFreeVGPR; 3708 unsigned NumSGPRs = NextFreeSGPR; 3709 3710 if (Version.Major >= 10) 3711 NumSGPRs = 0; 3712 else { 3713 unsigned MaxAddressableNumSGPRs = 3714 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3715 3716 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3717 NumSGPRs > MaxAddressableNumSGPRs) 3718 return OutOfRangeError(SGPRRange); 3719 3720 NumSGPRs += 3721 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3722 3723 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3724 NumSGPRs > MaxAddressableNumSGPRs) 3725 return OutOfRangeError(SGPRRange); 3726 3727 if (Features.test(FeatureSGPRInitBug)) 3728 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3729 } 3730 3731 VGPRBlocks = 3732 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3733 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3734 3735 return false; 3736 } 3737 3738 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3739 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3740 return TokError("directive only supported for amdgcn architecture"); 3741 3742 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3743 return TokError("directive only supported for amdhsa OS"); 3744 3745 StringRef KernelName; 3746 if (getParser().parseIdentifier(KernelName)) 3747 return true; 3748 3749 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3750 3751 StringSet<> Seen; 3752 3753 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3754 3755 SMRange VGPRRange; 3756 uint64_t NextFreeVGPR = 0; 3757 SMRange SGPRRange; 3758 uint64_t NextFreeSGPR = 0; 3759 unsigned UserSGPRCount = 0; 3760 bool ReserveVCC = true; 3761 bool ReserveFlatScr = true; 3762 bool ReserveXNACK = hasXNACK(); 3763 Optional<bool> EnableWavefrontSize32; 3764 3765 while (true) { 3766 while (getLexer().is(AsmToken::EndOfStatement)) 3767 Lex(); 3768 3769 if (getLexer().isNot(AsmToken::Identifier)) 3770 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3771 3772 StringRef ID = getTok().getIdentifier(); 3773 SMRange IDRange = getTok().getLocRange(); 3774 Lex(); 3775 3776 if (ID == ".end_amdhsa_kernel") 3777 break; 3778 3779 if (Seen.find(ID) != Seen.end()) 3780 return TokError(".amdhsa_ directives cannot be repeated"); 3781 Seen.insert(ID); 3782 3783 SMLoc ValStart = getTok().getLoc(); 3784 int64_t IVal; 3785 if (getParser().parseAbsoluteExpression(IVal)) 3786 return true; 3787 SMLoc ValEnd = getTok().getLoc(); 3788 SMRange ValRange = SMRange(ValStart, ValEnd); 3789 3790 if (IVal < 0) 3791 return OutOfRangeError(ValRange); 3792 3793 uint64_t Val = IVal; 3794 3795 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3796 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3797 return OutOfRangeError(RANGE); \ 3798 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3799 3800 if (ID == ".amdhsa_group_segment_fixed_size") { 3801 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3802 return OutOfRangeError(ValRange); 3803 KD.group_segment_fixed_size = Val; 3804 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3805 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3806 return OutOfRangeError(ValRange); 3807 KD.private_segment_fixed_size = Val; 3808 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3809 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3810 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3811 Val, ValRange); 3812 if (Val) 3813 UserSGPRCount += 4; 3814 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3815 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3816 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3817 ValRange); 3818 if (Val) 3819 UserSGPRCount += 2; 3820 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3821 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3822 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3823 ValRange); 3824 if (Val) 3825 UserSGPRCount += 2; 3826 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3827 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3828 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3829 Val, ValRange); 3830 if (Val) 3831 UserSGPRCount += 2; 3832 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3833 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3834 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3835 ValRange); 3836 if (Val) 3837 UserSGPRCount += 2; 3838 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3839 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3840 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3841 ValRange); 3842 if (Val) 3843 UserSGPRCount += 2; 3844 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3845 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3846 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3847 Val, ValRange); 3848 if (Val) 3849 UserSGPRCount += 1; 3850 } else if (ID == ".amdhsa_wavefront_size32") { 3851 if (IVersion.Major < 10) 3852 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3853 IDRange); 3854 EnableWavefrontSize32 = Val; 3855 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3856 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3857 Val, ValRange); 3858 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3859 PARSE_BITS_ENTRY( 3860 KD.compute_pgm_rsrc2, 3861 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3862 ValRange); 3863 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3864 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3865 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3866 ValRange); 3867 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3868 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3869 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3870 ValRange); 3871 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3872 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3873 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3874 ValRange); 3875 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3876 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3877 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3878 ValRange); 3879 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3880 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3881 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3882 ValRange); 3883 } else if (ID == ".amdhsa_next_free_vgpr") { 3884 VGPRRange = ValRange; 3885 NextFreeVGPR = Val; 3886 } else if (ID == ".amdhsa_next_free_sgpr") { 3887 SGPRRange = ValRange; 3888 NextFreeSGPR = Val; 3889 } else if (ID == ".amdhsa_reserve_vcc") { 3890 if (!isUInt<1>(Val)) 3891 return OutOfRangeError(ValRange); 3892 ReserveVCC = Val; 3893 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3894 if (IVersion.Major < 7) 3895 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3896 IDRange); 3897 if (!isUInt<1>(Val)) 3898 return OutOfRangeError(ValRange); 3899 ReserveFlatScr = Val; 3900 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3901 if (IVersion.Major < 8) 3902 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3903 IDRange); 3904 if (!isUInt<1>(Val)) 3905 return OutOfRangeError(ValRange); 3906 ReserveXNACK = Val; 3907 } else if (ID == ".amdhsa_float_round_mode_32") { 3908 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3909 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3910 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3911 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3912 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3913 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3914 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3915 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3916 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3917 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3918 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3919 ValRange); 3920 } else if (ID == ".amdhsa_dx10_clamp") { 3921 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3922 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3923 } else if (ID == ".amdhsa_ieee_mode") { 3924 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3925 Val, ValRange); 3926 } else if (ID == ".amdhsa_fp16_overflow") { 3927 if (IVersion.Major < 9) 3928 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3929 IDRange); 3930 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3931 ValRange); 3932 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3933 if (IVersion.Major < 10) 3934 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3935 IDRange); 3936 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3937 ValRange); 3938 } else if (ID == ".amdhsa_memory_ordered") { 3939 if (IVersion.Major < 10) 3940 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3941 IDRange); 3942 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3943 ValRange); 3944 } else if (ID == ".amdhsa_forward_progress") { 3945 if (IVersion.Major < 10) 3946 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3947 IDRange); 3948 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3949 ValRange); 3950 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3951 PARSE_BITS_ENTRY( 3952 KD.compute_pgm_rsrc2, 3953 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3954 ValRange); 3955 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3956 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3957 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3958 Val, ValRange); 3959 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3960 PARSE_BITS_ENTRY( 3961 KD.compute_pgm_rsrc2, 3962 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3963 ValRange); 3964 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3965 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3966 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3967 Val, ValRange); 3968 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3969 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3970 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3971 Val, ValRange); 3972 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3973 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3974 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3975 Val, ValRange); 3976 } else if (ID == ".amdhsa_exception_int_div_zero") { 3977 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3978 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3979 Val, ValRange); 3980 } else { 3981 return getParser().Error(IDRange.Start, 3982 "unknown .amdhsa_kernel directive", IDRange); 3983 } 3984 3985 #undef PARSE_BITS_ENTRY 3986 } 3987 3988 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3989 return TokError(".amdhsa_next_free_vgpr directive is required"); 3990 3991 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3992 return TokError(".amdhsa_next_free_sgpr directive is required"); 3993 3994 unsigned VGPRBlocks; 3995 unsigned SGPRBlocks; 3996 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3997 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3998 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3999 SGPRBlocks)) 4000 return true; 4001 4002 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4003 VGPRBlocks)) 4004 return OutOfRangeError(VGPRRange); 4005 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4006 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4007 4008 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4009 SGPRBlocks)) 4010 return OutOfRangeError(SGPRRange); 4011 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4012 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4013 SGPRBlocks); 4014 4015 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4016 return TokError("too many user SGPRs enabled"); 4017 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4018 UserSGPRCount); 4019 4020 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4021 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4022 ReserveFlatScr, ReserveXNACK); 4023 return false; 4024 } 4025 4026 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4027 uint32_t Major; 4028 uint32_t Minor; 4029 4030 if (ParseDirectiveMajorMinor(Major, Minor)) 4031 return true; 4032 4033 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4034 return false; 4035 } 4036 4037 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4038 uint32_t Major; 4039 uint32_t Minor; 4040 uint32_t Stepping; 4041 StringRef VendorName; 4042 StringRef ArchName; 4043 4044 // If this directive has no arguments, then use the ISA version for the 4045 // targeted GPU. 4046 if (getLexer().is(AsmToken::EndOfStatement)) { 4047 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4048 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4049 ISA.Stepping, 4050 "AMD", "AMDGPU"); 4051 return false; 4052 } 4053 4054 if (ParseDirectiveMajorMinor(Major, Minor)) 4055 return true; 4056 4057 if (getLexer().isNot(AsmToken::Comma)) 4058 return TokError("stepping version number required, comma expected"); 4059 Lex(); 4060 4061 if (ParseAsAbsoluteExpression(Stepping)) 4062 return TokError("invalid stepping version"); 4063 4064 if (getLexer().isNot(AsmToken::Comma)) 4065 return TokError("vendor name required, comma expected"); 4066 Lex(); 4067 4068 if (getLexer().isNot(AsmToken::String)) 4069 return TokError("invalid vendor name"); 4070 4071 VendorName = getLexer().getTok().getStringContents(); 4072 Lex(); 4073 4074 if (getLexer().isNot(AsmToken::Comma)) 4075 return TokError("arch name required, comma expected"); 4076 Lex(); 4077 4078 if (getLexer().isNot(AsmToken::String)) 4079 return TokError("invalid arch name"); 4080 4081 ArchName = getLexer().getTok().getStringContents(); 4082 Lex(); 4083 4084 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4085 VendorName, ArchName); 4086 return false; 4087 } 4088 4089 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4090 amd_kernel_code_t &Header) { 4091 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4092 // assembly for backwards compatibility. 4093 if (ID == "max_scratch_backing_memory_byte_size") { 4094 Parser.eatToEndOfStatement(); 4095 return false; 4096 } 4097 4098 SmallString<40> ErrStr; 4099 raw_svector_ostream Err(ErrStr); 4100 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4101 return TokError(Err.str()); 4102 } 4103 Lex(); 4104 4105 if (ID == "enable_wavefront_size32") { 4106 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4107 if (!isGFX10()) 4108 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4109 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4110 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4111 } else { 4112 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4113 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4114 } 4115 } 4116 4117 if (ID == "wavefront_size") { 4118 if (Header.wavefront_size == 5) { 4119 if (!isGFX10()) 4120 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4121 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4122 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4123 } else if (Header.wavefront_size == 6) { 4124 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4125 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4126 } 4127 } 4128 4129 if (ID == "enable_wgp_mode") { 4130 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4131 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4132 } 4133 4134 if (ID == "enable_mem_ordered") { 4135 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4136 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4137 } 4138 4139 if (ID == "enable_fwd_progress") { 4140 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4141 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4142 } 4143 4144 return false; 4145 } 4146 4147 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4148 amd_kernel_code_t Header; 4149 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4150 4151 while (true) { 4152 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4153 // will set the current token to EndOfStatement. 4154 while(getLexer().is(AsmToken::EndOfStatement)) 4155 Lex(); 4156 4157 if (getLexer().isNot(AsmToken::Identifier)) 4158 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4159 4160 StringRef ID = getLexer().getTok().getIdentifier(); 4161 Lex(); 4162 4163 if (ID == ".end_amd_kernel_code_t") 4164 break; 4165 4166 if (ParseAMDKernelCodeTValue(ID, Header)) 4167 return true; 4168 } 4169 4170 getTargetStreamer().EmitAMDKernelCodeT(Header); 4171 4172 return false; 4173 } 4174 4175 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4176 if (getLexer().isNot(AsmToken::Identifier)) 4177 return TokError("expected symbol name"); 4178 4179 StringRef KernelName = Parser.getTok().getString(); 4180 4181 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4182 ELF::STT_AMDGPU_HSA_KERNEL); 4183 Lex(); 4184 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4185 KernelScope.initialize(getContext()); 4186 return false; 4187 } 4188 4189 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4190 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4191 return Error(getParser().getTok().getLoc(), 4192 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4193 "architectures"); 4194 } 4195 4196 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4197 4198 std::string ISAVersionStringFromSTI; 4199 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4200 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4201 4202 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4203 return Error(getParser().getTok().getLoc(), 4204 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4205 "arguments specified through the command line"); 4206 } 4207 4208 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4209 Lex(); 4210 4211 return false; 4212 } 4213 4214 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4215 const char *AssemblerDirectiveBegin; 4216 const char *AssemblerDirectiveEnd; 4217 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4218 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4219 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4220 HSAMD::V3::AssemblerDirectiveEnd) 4221 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4222 HSAMD::AssemblerDirectiveEnd); 4223 4224 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4225 return Error(getParser().getTok().getLoc(), 4226 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4227 "not available on non-amdhsa OSes")).str()); 4228 } 4229 4230 std::string HSAMetadataString; 4231 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4232 HSAMetadataString)) 4233 return true; 4234 4235 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4236 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4237 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4238 } else { 4239 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4240 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4241 } 4242 4243 return false; 4244 } 4245 4246 /// Common code to parse out a block of text (typically YAML) between start and 4247 /// end directives. 4248 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4249 const char *AssemblerDirectiveEnd, 4250 std::string &CollectString) { 4251 4252 raw_string_ostream CollectStream(CollectString); 4253 4254 getLexer().setSkipSpace(false); 4255 4256 bool FoundEnd = false; 4257 while (!getLexer().is(AsmToken::Eof)) { 4258 while (getLexer().is(AsmToken::Space)) { 4259 CollectStream << getLexer().getTok().getString(); 4260 Lex(); 4261 } 4262 4263 if (getLexer().is(AsmToken::Identifier)) { 4264 StringRef ID = getLexer().getTok().getIdentifier(); 4265 if (ID == AssemblerDirectiveEnd) { 4266 Lex(); 4267 FoundEnd = true; 4268 break; 4269 } 4270 } 4271 4272 CollectStream << Parser.parseStringToEndOfStatement() 4273 << getContext().getAsmInfo()->getSeparatorString(); 4274 4275 Parser.eatToEndOfStatement(); 4276 } 4277 4278 getLexer().setSkipSpace(true); 4279 4280 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4281 return TokError(Twine("expected directive ") + 4282 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4283 } 4284 4285 CollectStream.flush(); 4286 return false; 4287 } 4288 4289 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4290 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4291 std::string String; 4292 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4293 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4294 return true; 4295 4296 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4297 if (!PALMetadata->setFromString(String)) 4298 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4299 return false; 4300 } 4301 4302 /// Parse the assembler directive for old linear-format PAL metadata. 4303 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4304 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4305 return Error(getParser().getTok().getLoc(), 4306 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4307 "not available on non-amdpal OSes")).str()); 4308 } 4309 4310 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4311 PALMetadata->setLegacy(); 4312 for (;;) { 4313 uint32_t Key, Value; 4314 if (ParseAsAbsoluteExpression(Key)) { 4315 return TokError(Twine("invalid value in ") + 4316 Twine(PALMD::AssemblerDirective)); 4317 } 4318 if (getLexer().isNot(AsmToken::Comma)) { 4319 return TokError(Twine("expected an even number of values in ") + 4320 Twine(PALMD::AssemblerDirective)); 4321 } 4322 Lex(); 4323 if (ParseAsAbsoluteExpression(Value)) { 4324 return TokError(Twine("invalid value in ") + 4325 Twine(PALMD::AssemblerDirective)); 4326 } 4327 PALMetadata->setRegister(Key, Value); 4328 if (getLexer().isNot(AsmToken::Comma)) 4329 break; 4330 Lex(); 4331 } 4332 return false; 4333 } 4334 4335 /// ParseDirectiveAMDGPULDS 4336 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4337 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4338 if (getParser().checkForValidSection()) 4339 return true; 4340 4341 StringRef Name; 4342 SMLoc NameLoc = getLexer().getLoc(); 4343 if (getParser().parseIdentifier(Name)) 4344 return TokError("expected identifier in directive"); 4345 4346 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4347 if (parseToken(AsmToken::Comma, "expected ','")) 4348 return true; 4349 4350 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4351 4352 int64_t Size; 4353 SMLoc SizeLoc = getLexer().getLoc(); 4354 if (getParser().parseAbsoluteExpression(Size)) 4355 return true; 4356 if (Size < 0) 4357 return Error(SizeLoc, "size must be non-negative"); 4358 if (Size > LocalMemorySize) 4359 return Error(SizeLoc, "size is too large"); 4360 4361 int64_t Align = 4; 4362 if (getLexer().is(AsmToken::Comma)) { 4363 Lex(); 4364 SMLoc AlignLoc = getLexer().getLoc(); 4365 if (getParser().parseAbsoluteExpression(Align)) 4366 return true; 4367 if (Align < 0 || !isPowerOf2_64(Align)) 4368 return Error(AlignLoc, "alignment must be a power of two"); 4369 4370 // Alignment larger than the size of LDS is possible in theory, as long 4371 // as the linker manages to place to symbol at address 0, but we do want 4372 // to make sure the alignment fits nicely into a 32-bit integer. 4373 if (Align >= 1u << 31) 4374 return Error(AlignLoc, "alignment is too large"); 4375 } 4376 4377 if (parseToken(AsmToken::EndOfStatement, 4378 "unexpected token in '.amdgpu_lds' directive")) 4379 return true; 4380 4381 Symbol->redefineIfPossible(); 4382 if (!Symbol->isUndefined()) 4383 return Error(NameLoc, "invalid symbol redefinition"); 4384 4385 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4386 return false; 4387 } 4388 4389 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4390 StringRef IDVal = DirectiveID.getString(); 4391 4392 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4393 if (IDVal == ".amdgcn_target") 4394 return ParseDirectiveAMDGCNTarget(); 4395 4396 if (IDVal == ".amdhsa_kernel") 4397 return ParseDirectiveAMDHSAKernel(); 4398 4399 // TODO: Restructure/combine with PAL metadata directive. 4400 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4401 return ParseDirectiveHSAMetadata(); 4402 } else { 4403 if (IDVal == ".hsa_code_object_version") 4404 return ParseDirectiveHSACodeObjectVersion(); 4405 4406 if (IDVal == ".hsa_code_object_isa") 4407 return ParseDirectiveHSACodeObjectISA(); 4408 4409 if (IDVal == ".amd_kernel_code_t") 4410 return ParseDirectiveAMDKernelCodeT(); 4411 4412 if (IDVal == ".amdgpu_hsa_kernel") 4413 return ParseDirectiveAMDGPUHsaKernel(); 4414 4415 if (IDVal == ".amd_amdgpu_isa") 4416 return ParseDirectiveISAVersion(); 4417 4418 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4419 return ParseDirectiveHSAMetadata(); 4420 } 4421 4422 if (IDVal == ".amdgpu_lds") 4423 return ParseDirectiveAMDGPULDS(); 4424 4425 if (IDVal == PALMD::AssemblerDirectiveBegin) 4426 return ParseDirectivePALMetadataBegin(); 4427 4428 if (IDVal == PALMD::AssemblerDirective) 4429 return ParseDirectivePALMetadata(); 4430 4431 return true; 4432 } 4433 4434 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4435 unsigned RegNo) const { 4436 4437 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4438 R.isValid(); ++R) { 4439 if (*R == RegNo) 4440 return isGFX9() || isGFX10(); 4441 } 4442 4443 // GFX10 has 2 more SGPRs 104 and 105. 4444 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4445 R.isValid(); ++R) { 4446 if (*R == RegNo) 4447 return hasSGPR104_SGPR105(); 4448 } 4449 4450 switch (RegNo) { 4451 case AMDGPU::SRC_SHARED_BASE: 4452 case AMDGPU::SRC_SHARED_LIMIT: 4453 case AMDGPU::SRC_PRIVATE_BASE: 4454 case AMDGPU::SRC_PRIVATE_LIMIT: 4455 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4456 return !isCI() && !isSI() && !isVI(); 4457 case AMDGPU::TBA: 4458 case AMDGPU::TBA_LO: 4459 case AMDGPU::TBA_HI: 4460 case AMDGPU::TMA: 4461 case AMDGPU::TMA_LO: 4462 case AMDGPU::TMA_HI: 4463 return !isGFX9() && !isGFX10(); 4464 case AMDGPU::XNACK_MASK: 4465 case AMDGPU::XNACK_MASK_LO: 4466 case AMDGPU::XNACK_MASK_HI: 4467 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4468 case AMDGPU::SGPR_NULL: 4469 return isGFX10(); 4470 default: 4471 break; 4472 } 4473 4474 if (isCI()) 4475 return true; 4476 4477 if (isSI() || isGFX10()) { 4478 // No flat_scr on SI. 4479 // On GFX10 flat scratch is not a valid register operand and can only be 4480 // accessed with s_setreg/s_getreg. 4481 switch (RegNo) { 4482 case AMDGPU::FLAT_SCR: 4483 case AMDGPU::FLAT_SCR_LO: 4484 case AMDGPU::FLAT_SCR_HI: 4485 return false; 4486 default: 4487 return true; 4488 } 4489 } 4490 4491 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4492 // SI/CI have. 4493 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4494 R.isValid(); ++R) { 4495 if (*R == RegNo) 4496 return hasSGPR102_SGPR103(); 4497 } 4498 4499 return true; 4500 } 4501 4502 OperandMatchResultTy 4503 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4504 OperandMode Mode) { 4505 // Try to parse with a custom parser 4506 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4507 4508 // If we successfully parsed the operand or if there as an error parsing, 4509 // we are done. 4510 // 4511 // If we are parsing after we reach EndOfStatement then this means we 4512 // are appending default values to the Operands list. This is only done 4513 // by custom parser, so we shouldn't continue on to the generic parsing. 4514 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4515 getLexer().is(AsmToken::EndOfStatement)) 4516 return ResTy; 4517 4518 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4519 unsigned Prefix = Operands.size(); 4520 SMLoc LBraceLoc = getTok().getLoc(); 4521 Parser.Lex(); // eat the '[' 4522 4523 for (;;) { 4524 ResTy = parseReg(Operands); 4525 if (ResTy != MatchOperand_Success) 4526 return ResTy; 4527 4528 if (getLexer().is(AsmToken::RBrac)) 4529 break; 4530 4531 if (getLexer().isNot(AsmToken::Comma)) 4532 return MatchOperand_ParseFail; 4533 Parser.Lex(); 4534 } 4535 4536 if (Operands.size() - Prefix > 1) { 4537 Operands.insert(Operands.begin() + Prefix, 4538 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4539 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4540 getTok().getLoc())); 4541 } 4542 4543 Parser.Lex(); // eat the ']' 4544 return MatchOperand_Success; 4545 } 4546 4547 return parseRegOrImm(Operands); 4548 } 4549 4550 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4551 // Clear any forced encodings from the previous instruction. 4552 setForcedEncodingSize(0); 4553 setForcedDPP(false); 4554 setForcedSDWA(false); 4555 4556 if (Name.endswith("_e64")) { 4557 setForcedEncodingSize(64); 4558 return Name.substr(0, Name.size() - 4); 4559 } else if (Name.endswith("_e32")) { 4560 setForcedEncodingSize(32); 4561 return Name.substr(0, Name.size() - 4); 4562 } else if (Name.endswith("_dpp")) { 4563 setForcedDPP(true); 4564 return Name.substr(0, Name.size() - 4); 4565 } else if (Name.endswith("_sdwa")) { 4566 setForcedSDWA(true); 4567 return Name.substr(0, Name.size() - 5); 4568 } 4569 return Name; 4570 } 4571 4572 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4573 StringRef Name, 4574 SMLoc NameLoc, OperandVector &Operands) { 4575 // Add the instruction mnemonic 4576 Name = parseMnemonicSuffix(Name); 4577 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4578 4579 bool IsMIMG = Name.startswith("image_"); 4580 4581 while (!getLexer().is(AsmToken::EndOfStatement)) { 4582 OperandMode Mode = OperandMode_Default; 4583 if (IsMIMG && isGFX10() && Operands.size() == 2) 4584 Mode = OperandMode_NSA; 4585 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4586 4587 // Eat the comma or space if there is one. 4588 if (getLexer().is(AsmToken::Comma)) 4589 Parser.Lex(); 4590 4591 switch (Res) { 4592 case MatchOperand_Success: break; 4593 case MatchOperand_ParseFail: 4594 // FIXME: use real operand location rather than the current location. 4595 Error(getLexer().getLoc(), "failed parsing operand."); 4596 while (!getLexer().is(AsmToken::EndOfStatement)) { 4597 Parser.Lex(); 4598 } 4599 return true; 4600 case MatchOperand_NoMatch: 4601 // FIXME: use real operand location rather than the current location. 4602 Error(getLexer().getLoc(), "not a valid operand."); 4603 while (!getLexer().is(AsmToken::EndOfStatement)) { 4604 Parser.Lex(); 4605 } 4606 return true; 4607 } 4608 } 4609 4610 return false; 4611 } 4612 4613 //===----------------------------------------------------------------------===// 4614 // Utility functions 4615 //===----------------------------------------------------------------------===// 4616 4617 OperandMatchResultTy 4618 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4619 4620 if (!trySkipId(Prefix, AsmToken::Colon)) 4621 return MatchOperand_NoMatch; 4622 4623 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4624 } 4625 4626 OperandMatchResultTy 4627 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4628 AMDGPUOperand::ImmTy ImmTy, 4629 bool (*ConvertResult)(int64_t&)) { 4630 SMLoc S = getLoc(); 4631 int64_t Value = 0; 4632 4633 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4634 if (Res != MatchOperand_Success) 4635 return Res; 4636 4637 if (ConvertResult && !ConvertResult(Value)) { 4638 Error(S, "invalid " + StringRef(Prefix) + " value."); 4639 } 4640 4641 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4642 return MatchOperand_Success; 4643 } 4644 4645 OperandMatchResultTy 4646 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4647 OperandVector &Operands, 4648 AMDGPUOperand::ImmTy ImmTy, 4649 bool (*ConvertResult)(int64_t&)) { 4650 SMLoc S = getLoc(); 4651 if (!trySkipId(Prefix, AsmToken::Colon)) 4652 return MatchOperand_NoMatch; 4653 4654 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4655 return MatchOperand_ParseFail; 4656 4657 unsigned Val = 0; 4658 const unsigned MaxSize = 4; 4659 4660 // FIXME: How to verify the number of elements matches the number of src 4661 // operands? 4662 for (int I = 0; ; ++I) { 4663 int64_t Op; 4664 SMLoc Loc = getLoc(); 4665 if (!parseExpr(Op)) 4666 return MatchOperand_ParseFail; 4667 4668 if (Op != 0 && Op != 1) { 4669 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4670 return MatchOperand_ParseFail; 4671 } 4672 4673 Val |= (Op << I); 4674 4675 if (trySkipToken(AsmToken::RBrac)) 4676 break; 4677 4678 if (I + 1 == MaxSize) { 4679 Error(getLoc(), "expected a closing square bracket"); 4680 return MatchOperand_ParseFail; 4681 } 4682 4683 if (!skipToken(AsmToken::Comma, "expected a comma")) 4684 return MatchOperand_ParseFail; 4685 } 4686 4687 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4688 return MatchOperand_Success; 4689 } 4690 4691 OperandMatchResultTy 4692 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4693 AMDGPUOperand::ImmTy ImmTy) { 4694 int64_t Bit = 0; 4695 SMLoc S = Parser.getTok().getLoc(); 4696 4697 // We are at the end of the statement, and this is a default argument, so 4698 // use a default value. 4699 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4700 switch(getLexer().getKind()) { 4701 case AsmToken::Identifier: { 4702 StringRef Tok = Parser.getTok().getString(); 4703 if (Tok == Name) { 4704 if (Tok == "r128" && !hasMIMG_R128()) 4705 Error(S, "r128 modifier is not supported on this GPU"); 4706 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4707 Error(S, "a16 modifier is not supported on this GPU"); 4708 Bit = 1; 4709 Parser.Lex(); 4710 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4711 Bit = 0; 4712 Parser.Lex(); 4713 } else { 4714 return MatchOperand_NoMatch; 4715 } 4716 break; 4717 } 4718 default: 4719 return MatchOperand_NoMatch; 4720 } 4721 } 4722 4723 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4724 return MatchOperand_ParseFail; 4725 4726 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4727 ImmTy = AMDGPUOperand::ImmTyR128A16; 4728 4729 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4730 return MatchOperand_Success; 4731 } 4732 4733 static void addOptionalImmOperand( 4734 MCInst& Inst, const OperandVector& Operands, 4735 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4736 AMDGPUOperand::ImmTy ImmT, 4737 int64_t Default = 0) { 4738 auto i = OptionalIdx.find(ImmT); 4739 if (i != OptionalIdx.end()) { 4740 unsigned Idx = i->second; 4741 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4742 } else { 4743 Inst.addOperand(MCOperand::createImm(Default)); 4744 } 4745 } 4746 4747 OperandMatchResultTy 4748 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4749 if (getLexer().isNot(AsmToken::Identifier)) { 4750 return MatchOperand_NoMatch; 4751 } 4752 StringRef Tok = Parser.getTok().getString(); 4753 if (Tok != Prefix) { 4754 return MatchOperand_NoMatch; 4755 } 4756 4757 Parser.Lex(); 4758 if (getLexer().isNot(AsmToken::Colon)) { 4759 return MatchOperand_ParseFail; 4760 } 4761 4762 Parser.Lex(); 4763 if (getLexer().isNot(AsmToken::Identifier)) { 4764 return MatchOperand_ParseFail; 4765 } 4766 4767 Value = Parser.getTok().getString(); 4768 return MatchOperand_Success; 4769 } 4770 4771 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4772 // values to live in a joint format operand in the MCInst encoding. 4773 OperandMatchResultTy 4774 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4775 SMLoc S = Parser.getTok().getLoc(); 4776 int64_t Dfmt = 0, Nfmt = 0; 4777 // dfmt and nfmt can appear in either order, and each is optional. 4778 bool GotDfmt = false, GotNfmt = false; 4779 while (!GotDfmt || !GotNfmt) { 4780 if (!GotDfmt) { 4781 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4782 if (Res != MatchOperand_NoMatch) { 4783 if (Res != MatchOperand_Success) 4784 return Res; 4785 if (Dfmt >= 16) { 4786 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4787 return MatchOperand_ParseFail; 4788 } 4789 GotDfmt = true; 4790 Parser.Lex(); 4791 continue; 4792 } 4793 } 4794 if (!GotNfmt) { 4795 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4796 if (Res != MatchOperand_NoMatch) { 4797 if (Res != MatchOperand_Success) 4798 return Res; 4799 if (Nfmt >= 8) { 4800 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4801 return MatchOperand_ParseFail; 4802 } 4803 GotNfmt = true; 4804 Parser.Lex(); 4805 continue; 4806 } 4807 } 4808 break; 4809 } 4810 if (!GotDfmt && !GotNfmt) 4811 return MatchOperand_NoMatch; 4812 auto Format = Dfmt | Nfmt << 4; 4813 Operands.push_back( 4814 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4815 return MatchOperand_Success; 4816 } 4817 4818 //===----------------------------------------------------------------------===// 4819 // ds 4820 //===----------------------------------------------------------------------===// 4821 4822 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4823 const OperandVector &Operands) { 4824 OptionalImmIndexMap OptionalIdx; 4825 4826 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4827 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4828 4829 // Add the register arguments 4830 if (Op.isReg()) { 4831 Op.addRegOperands(Inst, 1); 4832 continue; 4833 } 4834 4835 // Handle optional arguments 4836 OptionalIdx[Op.getImmTy()] = i; 4837 } 4838 4839 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4840 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4841 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4842 4843 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4844 } 4845 4846 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4847 bool IsGdsHardcoded) { 4848 OptionalImmIndexMap OptionalIdx; 4849 4850 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4851 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4852 4853 // Add the register arguments 4854 if (Op.isReg()) { 4855 Op.addRegOperands(Inst, 1); 4856 continue; 4857 } 4858 4859 if (Op.isToken() && Op.getToken() == "gds") { 4860 IsGdsHardcoded = true; 4861 continue; 4862 } 4863 4864 // Handle optional arguments 4865 OptionalIdx[Op.getImmTy()] = i; 4866 } 4867 4868 AMDGPUOperand::ImmTy OffsetType = 4869 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4870 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4871 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4872 AMDGPUOperand::ImmTyOffset; 4873 4874 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4875 4876 if (!IsGdsHardcoded) { 4877 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4878 } 4879 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4880 } 4881 4882 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4883 OptionalImmIndexMap OptionalIdx; 4884 4885 unsigned OperandIdx[4]; 4886 unsigned EnMask = 0; 4887 int SrcIdx = 0; 4888 4889 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4890 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4891 4892 // Add the register arguments 4893 if (Op.isReg()) { 4894 assert(SrcIdx < 4); 4895 OperandIdx[SrcIdx] = Inst.size(); 4896 Op.addRegOperands(Inst, 1); 4897 ++SrcIdx; 4898 continue; 4899 } 4900 4901 if (Op.isOff()) { 4902 assert(SrcIdx < 4); 4903 OperandIdx[SrcIdx] = Inst.size(); 4904 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4905 ++SrcIdx; 4906 continue; 4907 } 4908 4909 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4910 Op.addImmOperands(Inst, 1); 4911 continue; 4912 } 4913 4914 if (Op.isToken() && Op.getToken() == "done") 4915 continue; 4916 4917 // Handle optional arguments 4918 OptionalIdx[Op.getImmTy()] = i; 4919 } 4920 4921 assert(SrcIdx == 4); 4922 4923 bool Compr = false; 4924 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4925 Compr = true; 4926 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4927 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4928 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4929 } 4930 4931 for (auto i = 0; i < SrcIdx; ++i) { 4932 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4933 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4934 } 4935 } 4936 4937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4939 4940 Inst.addOperand(MCOperand::createImm(EnMask)); 4941 } 4942 4943 //===----------------------------------------------------------------------===// 4944 // s_waitcnt 4945 //===----------------------------------------------------------------------===// 4946 4947 static bool 4948 encodeCnt( 4949 const AMDGPU::IsaVersion ISA, 4950 int64_t &IntVal, 4951 int64_t CntVal, 4952 bool Saturate, 4953 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4954 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4955 { 4956 bool Failed = false; 4957 4958 IntVal = encode(ISA, IntVal, CntVal); 4959 if (CntVal != decode(ISA, IntVal)) { 4960 if (Saturate) { 4961 IntVal = encode(ISA, IntVal, -1); 4962 } else { 4963 Failed = true; 4964 } 4965 } 4966 return Failed; 4967 } 4968 4969 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4970 4971 SMLoc CntLoc = getLoc(); 4972 StringRef CntName = getTokenStr(); 4973 4974 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4975 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4976 return false; 4977 4978 int64_t CntVal; 4979 SMLoc ValLoc = getLoc(); 4980 if (!parseExpr(CntVal)) 4981 return false; 4982 4983 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4984 4985 bool Failed = true; 4986 bool Sat = CntName.endswith("_sat"); 4987 4988 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4989 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4990 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4991 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4992 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4993 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4994 } else { 4995 Error(CntLoc, "invalid counter name " + CntName); 4996 return false; 4997 } 4998 4999 if (Failed) { 5000 Error(ValLoc, "too large value for " + CntName); 5001 return false; 5002 } 5003 5004 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5005 return false; 5006 5007 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5008 if (isToken(AsmToken::EndOfStatement)) { 5009 Error(getLoc(), "expected a counter name"); 5010 return false; 5011 } 5012 } 5013 5014 return true; 5015 } 5016 5017 OperandMatchResultTy 5018 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5019 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5020 int64_t Waitcnt = getWaitcntBitMask(ISA); 5021 SMLoc S = getLoc(); 5022 5023 // If parse failed, do not return error code 5024 // to avoid excessive error messages. 5025 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5026 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 5027 } else { 5028 parseExpr(Waitcnt); 5029 } 5030 5031 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5032 return MatchOperand_Success; 5033 } 5034 5035 bool 5036 AMDGPUOperand::isSWaitCnt() const { 5037 return isImm(); 5038 } 5039 5040 //===----------------------------------------------------------------------===// 5041 // hwreg 5042 //===----------------------------------------------------------------------===// 5043 5044 bool 5045 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5046 int64_t &Offset, 5047 int64_t &Width) { 5048 using namespace llvm::AMDGPU::Hwreg; 5049 5050 // The register may be specified by name or using a numeric code 5051 if (isToken(AsmToken::Identifier) && 5052 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5053 HwReg.IsSymbolic = true; 5054 lex(); // skip message name 5055 } else if (!parseExpr(HwReg.Id)) { 5056 return false; 5057 } 5058 5059 if (trySkipToken(AsmToken::RParen)) 5060 return true; 5061 5062 // parse optional params 5063 return 5064 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5065 parseExpr(Offset) && 5066 skipToken(AsmToken::Comma, "expected a comma") && 5067 parseExpr(Width) && 5068 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5069 } 5070 5071 bool 5072 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5073 const int64_t Offset, 5074 const int64_t Width, 5075 const SMLoc Loc) { 5076 5077 using namespace llvm::AMDGPU::Hwreg; 5078 5079 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5080 Error(Loc, "specified hardware register is not supported on this GPU"); 5081 return false; 5082 } else if (!isValidHwreg(HwReg.Id)) { 5083 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5084 return false; 5085 } else if (!isValidHwregOffset(Offset)) { 5086 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5087 return false; 5088 } else if (!isValidHwregWidth(Width)) { 5089 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5090 return false; 5091 } 5092 return true; 5093 } 5094 5095 OperandMatchResultTy 5096 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5097 using namespace llvm::AMDGPU::Hwreg; 5098 5099 int64_t ImmVal = 0; 5100 SMLoc Loc = getLoc(); 5101 5102 // If parse failed, do not return error code 5103 // to avoid excessive error messages. 5104 if (trySkipId("hwreg", AsmToken::LParen)) { 5105 OperandInfoTy HwReg(ID_UNKNOWN_); 5106 int64_t Offset = OFFSET_DEFAULT_; 5107 int64_t Width = WIDTH_DEFAULT_; 5108 if (parseHwregBody(HwReg, Offset, Width) && 5109 validateHwreg(HwReg, Offset, Width, Loc)) { 5110 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5111 } 5112 } else if (parseExpr(ImmVal)) { 5113 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5114 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5115 } 5116 5117 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5118 return MatchOperand_Success; 5119 } 5120 5121 bool AMDGPUOperand::isHwreg() const { 5122 return isImmTy(ImmTyHwreg); 5123 } 5124 5125 //===----------------------------------------------------------------------===// 5126 // sendmsg 5127 //===----------------------------------------------------------------------===// 5128 5129 bool 5130 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5131 OperandInfoTy &Op, 5132 OperandInfoTy &Stream) { 5133 using namespace llvm::AMDGPU::SendMsg; 5134 5135 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5136 Msg.IsSymbolic = true; 5137 lex(); // skip message name 5138 } else if (!parseExpr(Msg.Id)) { 5139 return false; 5140 } 5141 5142 if (trySkipToken(AsmToken::Comma)) { 5143 Op.IsDefined = true; 5144 if (isToken(AsmToken::Identifier) && 5145 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5146 lex(); // skip operation name 5147 } else if (!parseExpr(Op.Id)) { 5148 return false; 5149 } 5150 5151 if (trySkipToken(AsmToken::Comma)) { 5152 Stream.IsDefined = true; 5153 if (!parseExpr(Stream.Id)) 5154 return false; 5155 } 5156 } 5157 5158 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5159 } 5160 5161 bool 5162 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5163 const OperandInfoTy &Op, 5164 const OperandInfoTy &Stream, 5165 const SMLoc S) { 5166 using namespace llvm::AMDGPU::SendMsg; 5167 5168 // Validation strictness depends on whether message is specified 5169 // in a symbolc or in a numeric form. In the latter case 5170 // only encoding possibility is checked. 5171 bool Strict = Msg.IsSymbolic; 5172 5173 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5174 Error(S, "invalid message id"); 5175 return false; 5176 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5177 Error(S, Op.IsDefined ? 5178 "message does not support operations" : 5179 "missing message operation"); 5180 return false; 5181 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5182 Error(S, "invalid operation id"); 5183 return false; 5184 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5185 Error(S, "message operation does not support streams"); 5186 return false; 5187 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5188 Error(S, "invalid message stream id"); 5189 return false; 5190 } 5191 return true; 5192 } 5193 5194 OperandMatchResultTy 5195 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5196 using namespace llvm::AMDGPU::SendMsg; 5197 5198 int64_t ImmVal = 0; 5199 SMLoc Loc = getLoc(); 5200 5201 // If parse failed, do not return error code 5202 // to avoid excessive error messages. 5203 if (trySkipId("sendmsg", AsmToken::LParen)) { 5204 OperandInfoTy Msg(ID_UNKNOWN_); 5205 OperandInfoTy Op(OP_NONE_); 5206 OperandInfoTy Stream(STREAM_ID_NONE_); 5207 if (parseSendMsgBody(Msg, Op, Stream) && 5208 validateSendMsg(Msg, Op, Stream, Loc)) { 5209 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5210 } 5211 } else if (parseExpr(ImmVal)) { 5212 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5213 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5214 } 5215 5216 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5217 return MatchOperand_Success; 5218 } 5219 5220 bool AMDGPUOperand::isSendMsg() const { 5221 return isImmTy(ImmTySendMsg); 5222 } 5223 5224 //===----------------------------------------------------------------------===// 5225 // v_interp 5226 //===----------------------------------------------------------------------===// 5227 5228 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5229 if (getLexer().getKind() != AsmToken::Identifier) 5230 return MatchOperand_NoMatch; 5231 5232 StringRef Str = Parser.getTok().getString(); 5233 int Slot = StringSwitch<int>(Str) 5234 .Case("p10", 0) 5235 .Case("p20", 1) 5236 .Case("p0", 2) 5237 .Default(-1); 5238 5239 SMLoc S = Parser.getTok().getLoc(); 5240 if (Slot == -1) 5241 return MatchOperand_ParseFail; 5242 5243 Parser.Lex(); 5244 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5245 AMDGPUOperand::ImmTyInterpSlot)); 5246 return MatchOperand_Success; 5247 } 5248 5249 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5250 if (getLexer().getKind() != AsmToken::Identifier) 5251 return MatchOperand_NoMatch; 5252 5253 StringRef Str = Parser.getTok().getString(); 5254 if (!Str.startswith("attr")) 5255 return MatchOperand_NoMatch; 5256 5257 StringRef Chan = Str.take_back(2); 5258 int AttrChan = StringSwitch<int>(Chan) 5259 .Case(".x", 0) 5260 .Case(".y", 1) 5261 .Case(".z", 2) 5262 .Case(".w", 3) 5263 .Default(-1); 5264 if (AttrChan == -1) 5265 return MatchOperand_ParseFail; 5266 5267 Str = Str.drop_back(2).drop_front(4); 5268 5269 uint8_t Attr; 5270 if (Str.getAsInteger(10, Attr)) 5271 return MatchOperand_ParseFail; 5272 5273 SMLoc S = Parser.getTok().getLoc(); 5274 Parser.Lex(); 5275 if (Attr > 63) { 5276 Error(S, "out of bounds attr"); 5277 return MatchOperand_Success; 5278 } 5279 5280 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5281 5282 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5283 AMDGPUOperand::ImmTyInterpAttr)); 5284 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5285 AMDGPUOperand::ImmTyAttrChan)); 5286 return MatchOperand_Success; 5287 } 5288 5289 //===----------------------------------------------------------------------===// 5290 // exp 5291 //===----------------------------------------------------------------------===// 5292 5293 void AMDGPUAsmParser::errorExpTgt() { 5294 Error(Parser.getTok().getLoc(), "invalid exp target"); 5295 } 5296 5297 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5298 uint8_t &Val) { 5299 if (Str == "null") { 5300 Val = 9; 5301 return MatchOperand_Success; 5302 } 5303 5304 if (Str.startswith("mrt")) { 5305 Str = Str.drop_front(3); 5306 if (Str == "z") { // == mrtz 5307 Val = 8; 5308 return MatchOperand_Success; 5309 } 5310 5311 if (Str.getAsInteger(10, Val)) 5312 return MatchOperand_ParseFail; 5313 5314 if (Val > 7) 5315 errorExpTgt(); 5316 5317 return MatchOperand_Success; 5318 } 5319 5320 if (Str.startswith("pos")) { 5321 Str = Str.drop_front(3); 5322 if (Str.getAsInteger(10, Val)) 5323 return MatchOperand_ParseFail; 5324 5325 if (Val > 4 || (Val == 4 && !isGFX10())) 5326 errorExpTgt(); 5327 5328 Val += 12; 5329 return MatchOperand_Success; 5330 } 5331 5332 if (isGFX10() && Str == "prim") { 5333 Val = 20; 5334 return MatchOperand_Success; 5335 } 5336 5337 if (Str.startswith("param")) { 5338 Str = Str.drop_front(5); 5339 if (Str.getAsInteger(10, Val)) 5340 return MatchOperand_ParseFail; 5341 5342 if (Val >= 32) 5343 errorExpTgt(); 5344 5345 Val += 32; 5346 return MatchOperand_Success; 5347 } 5348 5349 if (Str.startswith("invalid_target_")) { 5350 Str = Str.drop_front(15); 5351 if (Str.getAsInteger(10, Val)) 5352 return MatchOperand_ParseFail; 5353 5354 errorExpTgt(); 5355 return MatchOperand_Success; 5356 } 5357 5358 return MatchOperand_NoMatch; 5359 } 5360 5361 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5362 uint8_t Val; 5363 StringRef Str = Parser.getTok().getString(); 5364 5365 auto Res = parseExpTgtImpl(Str, Val); 5366 if (Res != MatchOperand_Success) 5367 return Res; 5368 5369 SMLoc S = Parser.getTok().getLoc(); 5370 Parser.Lex(); 5371 5372 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5373 AMDGPUOperand::ImmTyExpTgt)); 5374 return MatchOperand_Success; 5375 } 5376 5377 //===----------------------------------------------------------------------===// 5378 // parser helpers 5379 //===----------------------------------------------------------------------===// 5380 5381 bool 5382 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5383 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5384 } 5385 5386 bool 5387 AMDGPUAsmParser::isId(const StringRef Id) const { 5388 return isId(getToken(), Id); 5389 } 5390 5391 bool 5392 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5393 return getTokenKind() == Kind; 5394 } 5395 5396 bool 5397 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5398 if (isId(Id)) { 5399 lex(); 5400 return true; 5401 } 5402 return false; 5403 } 5404 5405 bool 5406 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5407 if (isId(Id) && peekToken().is(Kind)) { 5408 lex(); 5409 lex(); 5410 return true; 5411 } 5412 return false; 5413 } 5414 5415 bool 5416 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5417 if (isToken(Kind)) { 5418 lex(); 5419 return true; 5420 } 5421 return false; 5422 } 5423 5424 bool 5425 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5426 const StringRef ErrMsg) { 5427 if (!trySkipToken(Kind)) { 5428 Error(getLoc(), ErrMsg); 5429 return false; 5430 } 5431 return true; 5432 } 5433 5434 bool 5435 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5436 return !getParser().parseAbsoluteExpression(Imm); 5437 } 5438 5439 bool 5440 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5441 SMLoc S = getLoc(); 5442 5443 const MCExpr *Expr; 5444 if (Parser.parseExpression(Expr)) 5445 return false; 5446 5447 int64_t IntVal; 5448 if (Expr->evaluateAsAbsolute(IntVal)) { 5449 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5450 } else { 5451 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5452 } 5453 return true; 5454 } 5455 5456 bool 5457 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5458 if (isToken(AsmToken::String)) { 5459 Val = getToken().getStringContents(); 5460 lex(); 5461 return true; 5462 } else { 5463 Error(getLoc(), ErrMsg); 5464 return false; 5465 } 5466 } 5467 5468 AsmToken 5469 AMDGPUAsmParser::getToken() const { 5470 return Parser.getTok(); 5471 } 5472 5473 AsmToken 5474 AMDGPUAsmParser::peekToken() { 5475 return getLexer().peekTok(); 5476 } 5477 5478 void 5479 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5480 auto TokCount = getLexer().peekTokens(Tokens); 5481 5482 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5483 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5484 } 5485 5486 AsmToken::TokenKind 5487 AMDGPUAsmParser::getTokenKind() const { 5488 return getLexer().getKind(); 5489 } 5490 5491 SMLoc 5492 AMDGPUAsmParser::getLoc() const { 5493 return getToken().getLoc(); 5494 } 5495 5496 StringRef 5497 AMDGPUAsmParser::getTokenStr() const { 5498 return getToken().getString(); 5499 } 5500 5501 void 5502 AMDGPUAsmParser::lex() { 5503 Parser.Lex(); 5504 } 5505 5506 //===----------------------------------------------------------------------===// 5507 // swizzle 5508 //===----------------------------------------------------------------------===// 5509 5510 LLVM_READNONE 5511 static unsigned 5512 encodeBitmaskPerm(const unsigned AndMask, 5513 const unsigned OrMask, 5514 const unsigned XorMask) { 5515 using namespace llvm::AMDGPU::Swizzle; 5516 5517 return BITMASK_PERM_ENC | 5518 (AndMask << BITMASK_AND_SHIFT) | 5519 (OrMask << BITMASK_OR_SHIFT) | 5520 (XorMask << BITMASK_XOR_SHIFT); 5521 } 5522 5523 bool 5524 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5525 const unsigned MinVal, 5526 const unsigned MaxVal, 5527 const StringRef ErrMsg) { 5528 for (unsigned i = 0; i < OpNum; ++i) { 5529 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5530 return false; 5531 } 5532 SMLoc ExprLoc = Parser.getTok().getLoc(); 5533 if (!parseExpr(Op[i])) { 5534 return false; 5535 } 5536 if (Op[i] < MinVal || Op[i] > MaxVal) { 5537 Error(ExprLoc, ErrMsg); 5538 return false; 5539 } 5540 } 5541 5542 return true; 5543 } 5544 5545 bool 5546 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5547 using namespace llvm::AMDGPU::Swizzle; 5548 5549 int64_t Lane[LANE_NUM]; 5550 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5551 "expected a 2-bit lane id")) { 5552 Imm = QUAD_PERM_ENC; 5553 for (unsigned I = 0; I < LANE_NUM; ++I) { 5554 Imm |= Lane[I] << (LANE_SHIFT * I); 5555 } 5556 return true; 5557 } 5558 return false; 5559 } 5560 5561 bool 5562 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5563 using namespace llvm::AMDGPU::Swizzle; 5564 5565 SMLoc S = Parser.getTok().getLoc(); 5566 int64_t GroupSize; 5567 int64_t LaneIdx; 5568 5569 if (!parseSwizzleOperands(1, &GroupSize, 5570 2, 32, 5571 "group size must be in the interval [2,32]")) { 5572 return false; 5573 } 5574 if (!isPowerOf2_64(GroupSize)) { 5575 Error(S, "group size must be a power of two"); 5576 return false; 5577 } 5578 if (parseSwizzleOperands(1, &LaneIdx, 5579 0, GroupSize - 1, 5580 "lane id must be in the interval [0,group size - 1]")) { 5581 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5582 return true; 5583 } 5584 return false; 5585 } 5586 5587 bool 5588 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5589 using namespace llvm::AMDGPU::Swizzle; 5590 5591 SMLoc S = Parser.getTok().getLoc(); 5592 int64_t GroupSize; 5593 5594 if (!parseSwizzleOperands(1, &GroupSize, 5595 2, 32, "group size must be in the interval [2,32]")) { 5596 return false; 5597 } 5598 if (!isPowerOf2_64(GroupSize)) { 5599 Error(S, "group size must be a power of two"); 5600 return false; 5601 } 5602 5603 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5604 return true; 5605 } 5606 5607 bool 5608 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5609 using namespace llvm::AMDGPU::Swizzle; 5610 5611 SMLoc S = Parser.getTok().getLoc(); 5612 int64_t GroupSize; 5613 5614 if (!parseSwizzleOperands(1, &GroupSize, 5615 1, 16, "group size must be in the interval [1,16]")) { 5616 return false; 5617 } 5618 if (!isPowerOf2_64(GroupSize)) { 5619 Error(S, "group size must be a power of two"); 5620 return false; 5621 } 5622 5623 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5624 return true; 5625 } 5626 5627 bool 5628 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5629 using namespace llvm::AMDGPU::Swizzle; 5630 5631 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5632 return false; 5633 } 5634 5635 StringRef Ctl; 5636 SMLoc StrLoc = Parser.getTok().getLoc(); 5637 if (!parseString(Ctl)) { 5638 return false; 5639 } 5640 if (Ctl.size() != BITMASK_WIDTH) { 5641 Error(StrLoc, "expected a 5-character mask"); 5642 return false; 5643 } 5644 5645 unsigned AndMask = 0; 5646 unsigned OrMask = 0; 5647 unsigned XorMask = 0; 5648 5649 for (size_t i = 0; i < Ctl.size(); ++i) { 5650 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5651 switch(Ctl[i]) { 5652 default: 5653 Error(StrLoc, "invalid mask"); 5654 return false; 5655 case '0': 5656 break; 5657 case '1': 5658 OrMask |= Mask; 5659 break; 5660 case 'p': 5661 AndMask |= Mask; 5662 break; 5663 case 'i': 5664 AndMask |= Mask; 5665 XorMask |= Mask; 5666 break; 5667 } 5668 } 5669 5670 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5671 return true; 5672 } 5673 5674 bool 5675 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5676 5677 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5678 5679 if (!parseExpr(Imm)) { 5680 return false; 5681 } 5682 if (!isUInt<16>(Imm)) { 5683 Error(OffsetLoc, "expected a 16-bit offset"); 5684 return false; 5685 } 5686 return true; 5687 } 5688 5689 bool 5690 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5691 using namespace llvm::AMDGPU::Swizzle; 5692 5693 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5694 5695 SMLoc ModeLoc = Parser.getTok().getLoc(); 5696 bool Ok = false; 5697 5698 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5699 Ok = parseSwizzleQuadPerm(Imm); 5700 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5701 Ok = parseSwizzleBitmaskPerm(Imm); 5702 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5703 Ok = parseSwizzleBroadcast(Imm); 5704 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5705 Ok = parseSwizzleSwap(Imm); 5706 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5707 Ok = parseSwizzleReverse(Imm); 5708 } else { 5709 Error(ModeLoc, "expected a swizzle mode"); 5710 } 5711 5712 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5713 } 5714 5715 return false; 5716 } 5717 5718 OperandMatchResultTy 5719 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5720 SMLoc S = Parser.getTok().getLoc(); 5721 int64_t Imm = 0; 5722 5723 if (trySkipId("offset")) { 5724 5725 bool Ok = false; 5726 if (skipToken(AsmToken::Colon, "expected a colon")) { 5727 if (trySkipId("swizzle")) { 5728 Ok = parseSwizzleMacro(Imm); 5729 } else { 5730 Ok = parseSwizzleOffset(Imm); 5731 } 5732 } 5733 5734 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5735 5736 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5737 } else { 5738 // Swizzle "offset" operand is optional. 5739 // If it is omitted, try parsing other optional operands. 5740 return parseOptionalOpr(Operands); 5741 } 5742 } 5743 5744 bool 5745 AMDGPUOperand::isSwizzle() const { 5746 return isImmTy(ImmTySwizzle); 5747 } 5748 5749 //===----------------------------------------------------------------------===// 5750 // VGPR Index Mode 5751 //===----------------------------------------------------------------------===// 5752 5753 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5754 5755 using namespace llvm::AMDGPU::VGPRIndexMode; 5756 5757 if (trySkipToken(AsmToken::RParen)) { 5758 return OFF; 5759 } 5760 5761 int64_t Imm = 0; 5762 5763 while (true) { 5764 unsigned Mode = 0; 5765 SMLoc S = Parser.getTok().getLoc(); 5766 5767 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5768 if (trySkipId(IdSymbolic[ModeId])) { 5769 Mode = 1 << ModeId; 5770 break; 5771 } 5772 } 5773 5774 if (Mode == 0) { 5775 Error(S, (Imm == 0)? 5776 "expected a VGPR index mode or a closing parenthesis" : 5777 "expected a VGPR index mode"); 5778 break; 5779 } 5780 5781 if (Imm & Mode) { 5782 Error(S, "duplicate VGPR index mode"); 5783 break; 5784 } 5785 Imm |= Mode; 5786 5787 if (trySkipToken(AsmToken::RParen)) 5788 break; 5789 if (!skipToken(AsmToken::Comma, 5790 "expected a comma or a closing parenthesis")) 5791 break; 5792 } 5793 5794 return Imm; 5795 } 5796 5797 OperandMatchResultTy 5798 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5799 5800 int64_t Imm = 0; 5801 SMLoc S = Parser.getTok().getLoc(); 5802 5803 if (getLexer().getKind() == AsmToken::Identifier && 5804 Parser.getTok().getString() == "gpr_idx" && 5805 getLexer().peekTok().is(AsmToken::LParen)) { 5806 5807 Parser.Lex(); 5808 Parser.Lex(); 5809 5810 // If parse failed, trigger an error but do not return error code 5811 // to avoid excessive error messages. 5812 Imm = parseGPRIdxMacro(); 5813 5814 } else { 5815 if (getParser().parseAbsoluteExpression(Imm)) 5816 return MatchOperand_NoMatch; 5817 if (Imm < 0 || !isUInt<4>(Imm)) { 5818 Error(S, "invalid immediate: only 4-bit values are legal"); 5819 } 5820 } 5821 5822 Operands.push_back( 5823 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5824 return MatchOperand_Success; 5825 } 5826 5827 bool AMDGPUOperand::isGPRIdxMode() const { 5828 return isImmTy(ImmTyGprIdxMode); 5829 } 5830 5831 //===----------------------------------------------------------------------===// 5832 // sopp branch targets 5833 //===----------------------------------------------------------------------===// 5834 5835 OperandMatchResultTy 5836 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5837 5838 // Make sure we are not parsing something 5839 // that looks like a label or an expression but is not. 5840 // This will improve error messages. 5841 if (isRegister() || isModifier()) 5842 return MatchOperand_NoMatch; 5843 5844 if (parseExpr(Operands)) { 5845 5846 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5847 assert(Opr.isImm() || Opr.isExpr()); 5848 SMLoc Loc = Opr.getStartLoc(); 5849 5850 // Currently we do not support arbitrary expressions as branch targets. 5851 // Only labels and absolute expressions are accepted. 5852 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5853 Error(Loc, "expected an absolute expression or a label"); 5854 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5855 Error(Loc, "expected a 16-bit signed jump offset"); 5856 } 5857 } 5858 5859 return MatchOperand_Success; // avoid excessive error messages 5860 } 5861 5862 //===----------------------------------------------------------------------===// 5863 // Boolean holding registers 5864 //===----------------------------------------------------------------------===// 5865 5866 OperandMatchResultTy 5867 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5868 return parseReg(Operands); 5869 } 5870 5871 //===----------------------------------------------------------------------===// 5872 // mubuf 5873 //===----------------------------------------------------------------------===// 5874 5875 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5876 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5877 } 5878 5879 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5880 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5881 } 5882 5883 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5884 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5885 } 5886 5887 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5888 const OperandVector &Operands, 5889 bool IsAtomic, 5890 bool IsAtomicReturn, 5891 bool IsLds) { 5892 bool IsLdsOpcode = IsLds; 5893 bool HasLdsModifier = false; 5894 OptionalImmIndexMap OptionalIdx; 5895 assert(IsAtomicReturn ? IsAtomic : true); 5896 unsigned FirstOperandIdx = 1; 5897 5898 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5899 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5900 5901 // Add the register arguments 5902 if (Op.isReg()) { 5903 Op.addRegOperands(Inst, 1); 5904 // Insert a tied src for atomic return dst. 5905 // This cannot be postponed as subsequent calls to 5906 // addImmOperands rely on correct number of MC operands. 5907 if (IsAtomicReturn && i == FirstOperandIdx) 5908 Op.addRegOperands(Inst, 1); 5909 continue; 5910 } 5911 5912 // Handle the case where soffset is an immediate 5913 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5914 Op.addImmOperands(Inst, 1); 5915 continue; 5916 } 5917 5918 HasLdsModifier |= Op.isLDS(); 5919 5920 // Handle tokens like 'offen' which are sometimes hard-coded into the 5921 // asm string. There are no MCInst operands for these. 5922 if (Op.isToken()) { 5923 continue; 5924 } 5925 assert(Op.isImm()); 5926 5927 // Handle optional arguments 5928 OptionalIdx[Op.getImmTy()] = i; 5929 } 5930 5931 // This is a workaround for an llvm quirk which may result in an 5932 // incorrect instruction selection. Lds and non-lds versions of 5933 // MUBUF instructions are identical except that lds versions 5934 // have mandatory 'lds' modifier. However this modifier follows 5935 // optional modifiers and llvm asm matcher regards this 'lds' 5936 // modifier as an optional one. As a result, an lds version 5937 // of opcode may be selected even if it has no 'lds' modifier. 5938 if (IsLdsOpcode && !HasLdsModifier) { 5939 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5940 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5941 Inst.setOpcode(NoLdsOpcode); 5942 IsLdsOpcode = false; 5943 } 5944 } 5945 5946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5947 if (!IsAtomic) { // glc is hard-coded. 5948 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5949 } 5950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5951 5952 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5953 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5954 } 5955 5956 if (isGFX10()) 5957 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5958 } 5959 5960 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5961 OptionalImmIndexMap OptionalIdx; 5962 5963 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5964 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5965 5966 // Add the register arguments 5967 if (Op.isReg()) { 5968 Op.addRegOperands(Inst, 1); 5969 continue; 5970 } 5971 5972 // Handle the case where soffset is an immediate 5973 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5974 Op.addImmOperands(Inst, 1); 5975 continue; 5976 } 5977 5978 // Handle tokens like 'offen' which are sometimes hard-coded into the 5979 // asm string. There are no MCInst operands for these. 5980 if (Op.isToken()) { 5981 continue; 5982 } 5983 assert(Op.isImm()); 5984 5985 // Handle optional arguments 5986 OptionalIdx[Op.getImmTy()] = i; 5987 } 5988 5989 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5990 AMDGPUOperand::ImmTyOffset); 5991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5992 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5995 5996 if (isGFX10()) 5997 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5998 } 5999 6000 //===----------------------------------------------------------------------===// 6001 // mimg 6002 //===----------------------------------------------------------------------===// 6003 6004 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6005 bool IsAtomic) { 6006 unsigned I = 1; 6007 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6008 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6009 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6010 } 6011 6012 if (IsAtomic) { 6013 // Add src, same as dst 6014 assert(Desc.getNumDefs() == 1); 6015 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6016 } 6017 6018 OptionalImmIndexMap OptionalIdx; 6019 6020 for (unsigned E = Operands.size(); I != E; ++I) { 6021 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6022 6023 // Add the register arguments 6024 if (Op.isReg()) { 6025 Op.addRegOperands(Inst, 1); 6026 } else if (Op.isImmModifier()) { 6027 OptionalIdx[Op.getImmTy()] = I; 6028 } else if (!Op.isToken()) { 6029 llvm_unreachable("unexpected operand type"); 6030 } 6031 } 6032 6033 bool IsGFX10 = isGFX10(); 6034 6035 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6036 if (IsGFX10) 6037 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6038 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6039 if (IsGFX10) 6040 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6041 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6042 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6043 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6044 if (IsGFX10) 6045 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6046 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6047 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6048 if (!IsGFX10) 6049 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6050 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6051 } 6052 6053 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6054 cvtMIMG(Inst, Operands, true); 6055 } 6056 6057 //===----------------------------------------------------------------------===// 6058 // smrd 6059 //===----------------------------------------------------------------------===// 6060 6061 bool AMDGPUOperand::isSMRDOffset8() const { 6062 return isImm() && isUInt<8>(getImm()); 6063 } 6064 6065 bool AMDGPUOperand::isSMRDOffset20() const { 6066 return isImm() && isUInt<20>(getImm()); 6067 } 6068 6069 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6070 // 32-bit literals are only supported on CI and we only want to use them 6071 // when the offset is > 8-bits. 6072 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6073 } 6074 6075 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6076 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6077 } 6078 6079 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 6080 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6081 } 6082 6083 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6084 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6085 } 6086 6087 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6088 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6089 } 6090 6091 //===----------------------------------------------------------------------===// 6092 // vop3 6093 //===----------------------------------------------------------------------===// 6094 6095 static bool ConvertOmodMul(int64_t &Mul) { 6096 if (Mul != 1 && Mul != 2 && Mul != 4) 6097 return false; 6098 6099 Mul >>= 1; 6100 return true; 6101 } 6102 6103 static bool ConvertOmodDiv(int64_t &Div) { 6104 if (Div == 1) { 6105 Div = 0; 6106 return true; 6107 } 6108 6109 if (Div == 2) { 6110 Div = 3; 6111 return true; 6112 } 6113 6114 return false; 6115 } 6116 6117 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6118 if (BoundCtrl == 0) { 6119 BoundCtrl = 1; 6120 return true; 6121 } 6122 6123 if (BoundCtrl == -1) { 6124 BoundCtrl = 0; 6125 return true; 6126 } 6127 6128 return false; 6129 } 6130 6131 // Note: the order in this table matches the order of operands in AsmString. 6132 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6133 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6134 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6135 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6136 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6137 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6138 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6139 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6140 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6141 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6142 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6143 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6144 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6145 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6146 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6147 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6148 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6149 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6150 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6151 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6152 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6153 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6154 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6155 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6156 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6157 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6158 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6159 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6160 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6161 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6162 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6163 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6164 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6165 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6166 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6167 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6168 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6169 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6170 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6171 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6172 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6173 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6174 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6175 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6176 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6177 }; 6178 6179 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6180 6181 OperandMatchResultTy res = parseOptionalOpr(Operands); 6182 6183 // This is a hack to enable hardcoded mandatory operands which follow 6184 // optional operands. 6185 // 6186 // Current design assumes that all operands after the first optional operand 6187 // are also optional. However implementation of some instructions violates 6188 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6189 // 6190 // To alleviate this problem, we have to (implicitly) parse extra operands 6191 // to make sure autogenerated parser of custom operands never hit hardcoded 6192 // mandatory operands. 6193 6194 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6195 if (res != MatchOperand_Success || 6196 isToken(AsmToken::EndOfStatement)) 6197 break; 6198 6199 trySkipToken(AsmToken::Comma); 6200 res = parseOptionalOpr(Operands); 6201 } 6202 6203 return res; 6204 } 6205 6206 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6207 OperandMatchResultTy res; 6208 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6209 // try to parse any optional operand here 6210 if (Op.IsBit) { 6211 res = parseNamedBit(Op.Name, Operands, Op.Type); 6212 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6213 res = parseOModOperand(Operands); 6214 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6215 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6216 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6217 res = parseSDWASel(Operands, Op.Name, Op.Type); 6218 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6219 res = parseSDWADstUnused(Operands); 6220 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6221 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6222 Op.Type == AMDGPUOperand::ImmTyNegLo || 6223 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6224 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6225 Op.ConvertResult); 6226 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6227 res = parseDim(Operands); 6228 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6229 res = parseDfmtNfmt(Operands); 6230 } else { 6231 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6232 } 6233 if (res != MatchOperand_NoMatch) { 6234 return res; 6235 } 6236 } 6237 return MatchOperand_NoMatch; 6238 } 6239 6240 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6241 StringRef Name = Parser.getTok().getString(); 6242 if (Name == "mul") { 6243 return parseIntWithPrefix("mul", Operands, 6244 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6245 } 6246 6247 if (Name == "div") { 6248 return parseIntWithPrefix("div", Operands, 6249 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6250 } 6251 6252 return MatchOperand_NoMatch; 6253 } 6254 6255 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6256 cvtVOP3P(Inst, Operands); 6257 6258 int Opc = Inst.getOpcode(); 6259 6260 int SrcNum; 6261 const int Ops[] = { AMDGPU::OpName::src0, 6262 AMDGPU::OpName::src1, 6263 AMDGPU::OpName::src2 }; 6264 for (SrcNum = 0; 6265 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6266 ++SrcNum); 6267 assert(SrcNum > 0); 6268 6269 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6270 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6271 6272 if ((OpSel & (1 << SrcNum)) != 0) { 6273 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6274 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6275 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6276 } 6277 } 6278 6279 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6280 // 1. This operand is input modifiers 6281 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6282 // 2. This is not last operand 6283 && Desc.NumOperands > (OpNum + 1) 6284 // 3. Next operand is register class 6285 && Desc.OpInfo[OpNum + 1].RegClass != -1 6286 // 4. Next register is not tied to any other operand 6287 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6288 } 6289 6290 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6291 { 6292 OptionalImmIndexMap OptionalIdx; 6293 unsigned Opc = Inst.getOpcode(); 6294 6295 unsigned I = 1; 6296 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6297 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6298 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6299 } 6300 6301 for (unsigned E = Operands.size(); I != E; ++I) { 6302 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6303 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6304 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6305 } else if (Op.isInterpSlot() || 6306 Op.isInterpAttr() || 6307 Op.isAttrChan()) { 6308 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6309 } else if (Op.isImmModifier()) { 6310 OptionalIdx[Op.getImmTy()] = I; 6311 } else { 6312 llvm_unreachable("unhandled operand type"); 6313 } 6314 } 6315 6316 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6317 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6318 } 6319 6320 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6321 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6322 } 6323 6324 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6325 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6326 } 6327 } 6328 6329 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6330 OptionalImmIndexMap &OptionalIdx) { 6331 unsigned Opc = Inst.getOpcode(); 6332 6333 unsigned I = 1; 6334 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6335 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6336 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6337 } 6338 6339 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6340 // This instruction has src modifiers 6341 for (unsigned E = Operands.size(); I != E; ++I) { 6342 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6343 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6344 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6345 } else if (Op.isImmModifier()) { 6346 OptionalIdx[Op.getImmTy()] = I; 6347 } else if (Op.isRegOrImm()) { 6348 Op.addRegOrImmOperands(Inst, 1); 6349 } else { 6350 llvm_unreachable("unhandled operand type"); 6351 } 6352 } 6353 } else { 6354 // No src modifiers 6355 for (unsigned E = Operands.size(); I != E; ++I) { 6356 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6357 if (Op.isMod()) { 6358 OptionalIdx[Op.getImmTy()] = I; 6359 } else { 6360 Op.addRegOrImmOperands(Inst, 1); 6361 } 6362 } 6363 } 6364 6365 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6366 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6367 } 6368 6369 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6370 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6371 } 6372 6373 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6374 // it has src2 register operand that is tied to dst operand 6375 // we don't allow modifiers for this operand in assembler so src2_modifiers 6376 // should be 0. 6377 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6378 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6379 Opc == AMDGPU::V_MAC_F32_e64_vi || 6380 Opc == AMDGPU::V_MAC_F16_e64_vi || 6381 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6382 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6383 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6384 auto it = Inst.begin(); 6385 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6386 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6387 ++it; 6388 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6389 } 6390 } 6391 6392 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6393 OptionalImmIndexMap OptionalIdx; 6394 cvtVOP3(Inst, Operands, OptionalIdx); 6395 } 6396 6397 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6398 const OperandVector &Operands) { 6399 OptionalImmIndexMap OptIdx; 6400 const int Opc = Inst.getOpcode(); 6401 const MCInstrDesc &Desc = MII.get(Opc); 6402 6403 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6404 6405 cvtVOP3(Inst, Operands, OptIdx); 6406 6407 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6408 assert(!IsPacked); 6409 Inst.addOperand(Inst.getOperand(0)); 6410 } 6411 6412 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6413 // instruction, and then figure out where to actually put the modifiers 6414 6415 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6416 6417 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6418 if (OpSelHiIdx != -1) { 6419 int DefaultVal = IsPacked ? -1 : 0; 6420 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6421 DefaultVal); 6422 } 6423 6424 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6425 if (NegLoIdx != -1) { 6426 assert(IsPacked); 6427 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6428 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6429 } 6430 6431 const int Ops[] = { AMDGPU::OpName::src0, 6432 AMDGPU::OpName::src1, 6433 AMDGPU::OpName::src2 }; 6434 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6435 AMDGPU::OpName::src1_modifiers, 6436 AMDGPU::OpName::src2_modifiers }; 6437 6438 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6439 6440 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6441 unsigned OpSelHi = 0; 6442 unsigned NegLo = 0; 6443 unsigned NegHi = 0; 6444 6445 if (OpSelHiIdx != -1) { 6446 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6447 } 6448 6449 if (NegLoIdx != -1) { 6450 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6451 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6452 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6453 } 6454 6455 for (int J = 0; J < 3; ++J) { 6456 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6457 if (OpIdx == -1) 6458 break; 6459 6460 uint32_t ModVal = 0; 6461 6462 if ((OpSel & (1 << J)) != 0) 6463 ModVal |= SISrcMods::OP_SEL_0; 6464 6465 if ((OpSelHi & (1 << J)) != 0) 6466 ModVal |= SISrcMods::OP_SEL_1; 6467 6468 if ((NegLo & (1 << J)) != 0) 6469 ModVal |= SISrcMods::NEG; 6470 6471 if ((NegHi & (1 << J)) != 0) 6472 ModVal |= SISrcMods::NEG_HI; 6473 6474 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6475 6476 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6477 } 6478 } 6479 6480 //===----------------------------------------------------------------------===// 6481 // dpp 6482 //===----------------------------------------------------------------------===// 6483 6484 bool AMDGPUOperand::isDPP8() const { 6485 return isImmTy(ImmTyDPP8); 6486 } 6487 6488 bool AMDGPUOperand::isDPPCtrl() const { 6489 using namespace AMDGPU::DPP; 6490 6491 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6492 if (result) { 6493 int64_t Imm = getImm(); 6494 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6495 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6496 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6497 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6498 (Imm == DppCtrl::WAVE_SHL1) || 6499 (Imm == DppCtrl::WAVE_ROL1) || 6500 (Imm == DppCtrl::WAVE_SHR1) || 6501 (Imm == DppCtrl::WAVE_ROR1) || 6502 (Imm == DppCtrl::ROW_MIRROR) || 6503 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6504 (Imm == DppCtrl::BCAST15) || 6505 (Imm == DppCtrl::BCAST31) || 6506 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6507 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6508 } 6509 return false; 6510 } 6511 6512 //===----------------------------------------------------------------------===// 6513 // mAI 6514 //===----------------------------------------------------------------------===// 6515 6516 bool AMDGPUOperand::isBLGP() const { 6517 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6518 } 6519 6520 bool AMDGPUOperand::isCBSZ() const { 6521 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6522 } 6523 6524 bool AMDGPUOperand::isABID() const { 6525 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6526 } 6527 6528 bool AMDGPUOperand::isS16Imm() const { 6529 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6530 } 6531 6532 bool AMDGPUOperand::isU16Imm() const { 6533 return isImm() && isUInt<16>(getImm()); 6534 } 6535 6536 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6537 if (!isGFX10()) 6538 return MatchOperand_NoMatch; 6539 6540 SMLoc S = Parser.getTok().getLoc(); 6541 6542 if (getLexer().isNot(AsmToken::Identifier)) 6543 return MatchOperand_NoMatch; 6544 if (getLexer().getTok().getString() != "dim") 6545 return MatchOperand_NoMatch; 6546 6547 Parser.Lex(); 6548 if (getLexer().isNot(AsmToken::Colon)) 6549 return MatchOperand_ParseFail; 6550 6551 Parser.Lex(); 6552 6553 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6554 // integer. 6555 std::string Token; 6556 if (getLexer().is(AsmToken::Integer)) { 6557 SMLoc Loc = getLexer().getTok().getEndLoc(); 6558 Token = std::string(getLexer().getTok().getString()); 6559 Parser.Lex(); 6560 if (getLexer().getTok().getLoc() != Loc) 6561 return MatchOperand_ParseFail; 6562 } 6563 if (getLexer().isNot(AsmToken::Identifier)) 6564 return MatchOperand_ParseFail; 6565 Token += getLexer().getTok().getString(); 6566 6567 StringRef DimId = Token; 6568 if (DimId.startswith("SQ_RSRC_IMG_")) 6569 DimId = DimId.substr(12); 6570 6571 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6572 if (!DimInfo) 6573 return MatchOperand_ParseFail; 6574 6575 Parser.Lex(); 6576 6577 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6578 AMDGPUOperand::ImmTyDim)); 6579 return MatchOperand_Success; 6580 } 6581 6582 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6583 SMLoc S = Parser.getTok().getLoc(); 6584 StringRef Prefix; 6585 6586 if (getLexer().getKind() == AsmToken::Identifier) { 6587 Prefix = Parser.getTok().getString(); 6588 } else { 6589 return MatchOperand_NoMatch; 6590 } 6591 6592 if (Prefix != "dpp8") 6593 return parseDPPCtrl(Operands); 6594 if (!isGFX10()) 6595 return MatchOperand_NoMatch; 6596 6597 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6598 6599 int64_t Sels[8]; 6600 6601 Parser.Lex(); 6602 if (getLexer().isNot(AsmToken::Colon)) 6603 return MatchOperand_ParseFail; 6604 6605 Parser.Lex(); 6606 if (getLexer().isNot(AsmToken::LBrac)) 6607 return MatchOperand_ParseFail; 6608 6609 Parser.Lex(); 6610 if (getParser().parseAbsoluteExpression(Sels[0])) 6611 return MatchOperand_ParseFail; 6612 if (0 > Sels[0] || 7 < Sels[0]) 6613 return MatchOperand_ParseFail; 6614 6615 for (size_t i = 1; i < 8; ++i) { 6616 if (getLexer().isNot(AsmToken::Comma)) 6617 return MatchOperand_ParseFail; 6618 6619 Parser.Lex(); 6620 if (getParser().parseAbsoluteExpression(Sels[i])) 6621 return MatchOperand_ParseFail; 6622 if (0 > Sels[i] || 7 < Sels[i]) 6623 return MatchOperand_ParseFail; 6624 } 6625 6626 if (getLexer().isNot(AsmToken::RBrac)) 6627 return MatchOperand_ParseFail; 6628 Parser.Lex(); 6629 6630 unsigned DPP8 = 0; 6631 for (size_t i = 0; i < 8; ++i) 6632 DPP8 |= (Sels[i] << (i * 3)); 6633 6634 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6635 return MatchOperand_Success; 6636 } 6637 6638 OperandMatchResultTy 6639 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6640 using namespace AMDGPU::DPP; 6641 6642 SMLoc S = Parser.getTok().getLoc(); 6643 StringRef Prefix; 6644 int64_t Int; 6645 6646 if (getLexer().getKind() == AsmToken::Identifier) { 6647 Prefix = Parser.getTok().getString(); 6648 } else { 6649 return MatchOperand_NoMatch; 6650 } 6651 6652 if (Prefix == "row_mirror") { 6653 Int = DppCtrl::ROW_MIRROR; 6654 Parser.Lex(); 6655 } else if (Prefix == "row_half_mirror") { 6656 Int = DppCtrl::ROW_HALF_MIRROR; 6657 Parser.Lex(); 6658 } else { 6659 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6660 if (Prefix != "quad_perm" 6661 && Prefix != "row_shl" 6662 && Prefix != "row_shr" 6663 && Prefix != "row_ror" 6664 && Prefix != "wave_shl" 6665 && Prefix != "wave_rol" 6666 && Prefix != "wave_shr" 6667 && Prefix != "wave_ror" 6668 && Prefix != "row_bcast" 6669 && Prefix != "row_share" 6670 && Prefix != "row_xmask") { 6671 return MatchOperand_NoMatch; 6672 } 6673 6674 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6675 return MatchOperand_NoMatch; 6676 6677 if (!isVI() && !isGFX9() && 6678 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6679 Prefix == "wave_rol" || Prefix == "wave_ror" || 6680 Prefix == "row_bcast")) 6681 return MatchOperand_NoMatch; 6682 6683 Parser.Lex(); 6684 if (getLexer().isNot(AsmToken::Colon)) 6685 return MatchOperand_ParseFail; 6686 6687 if (Prefix == "quad_perm") { 6688 // quad_perm:[%d,%d,%d,%d] 6689 Parser.Lex(); 6690 if (getLexer().isNot(AsmToken::LBrac)) 6691 return MatchOperand_ParseFail; 6692 Parser.Lex(); 6693 6694 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6695 return MatchOperand_ParseFail; 6696 6697 for (int i = 0; i < 3; ++i) { 6698 if (getLexer().isNot(AsmToken::Comma)) 6699 return MatchOperand_ParseFail; 6700 Parser.Lex(); 6701 6702 int64_t Temp; 6703 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6704 return MatchOperand_ParseFail; 6705 const int shift = i*2 + 2; 6706 Int += (Temp << shift); 6707 } 6708 6709 if (getLexer().isNot(AsmToken::RBrac)) 6710 return MatchOperand_ParseFail; 6711 Parser.Lex(); 6712 } else { 6713 // sel:%d 6714 Parser.Lex(); 6715 if (getParser().parseAbsoluteExpression(Int)) 6716 return MatchOperand_ParseFail; 6717 6718 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6719 Int |= DppCtrl::ROW_SHL0; 6720 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6721 Int |= DppCtrl::ROW_SHR0; 6722 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6723 Int |= DppCtrl::ROW_ROR0; 6724 } else if (Prefix == "wave_shl" && 1 == Int) { 6725 Int = DppCtrl::WAVE_SHL1; 6726 } else if (Prefix == "wave_rol" && 1 == Int) { 6727 Int = DppCtrl::WAVE_ROL1; 6728 } else if (Prefix == "wave_shr" && 1 == Int) { 6729 Int = DppCtrl::WAVE_SHR1; 6730 } else if (Prefix == "wave_ror" && 1 == Int) { 6731 Int = DppCtrl::WAVE_ROR1; 6732 } else if (Prefix == "row_bcast") { 6733 if (Int == 15) { 6734 Int = DppCtrl::BCAST15; 6735 } else if (Int == 31) { 6736 Int = DppCtrl::BCAST31; 6737 } else { 6738 return MatchOperand_ParseFail; 6739 } 6740 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6741 Int |= DppCtrl::ROW_SHARE_FIRST; 6742 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6743 Int |= DppCtrl::ROW_XMASK_FIRST; 6744 } else { 6745 return MatchOperand_ParseFail; 6746 } 6747 } 6748 } 6749 6750 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6751 return MatchOperand_Success; 6752 } 6753 6754 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6755 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6756 } 6757 6758 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6759 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6760 } 6761 6762 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6763 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6764 } 6765 6766 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6767 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6768 } 6769 6770 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6771 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6772 } 6773 6774 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6775 OptionalImmIndexMap OptionalIdx; 6776 6777 unsigned I = 1; 6778 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6779 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6780 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6781 } 6782 6783 int Fi = 0; 6784 for (unsigned E = Operands.size(); I != E; ++I) { 6785 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6786 MCOI::TIED_TO); 6787 if (TiedTo != -1) { 6788 assert((unsigned)TiedTo < Inst.getNumOperands()); 6789 // handle tied old or src2 for MAC instructions 6790 Inst.addOperand(Inst.getOperand(TiedTo)); 6791 } 6792 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6793 // Add the register arguments 6794 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6795 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6796 // Skip it. 6797 continue; 6798 } 6799 6800 if (IsDPP8) { 6801 if (Op.isDPP8()) { 6802 Op.addImmOperands(Inst, 1); 6803 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6804 Op.addRegWithFPInputModsOperands(Inst, 2); 6805 } else if (Op.isFI()) { 6806 Fi = Op.getImm(); 6807 } else if (Op.isReg()) { 6808 Op.addRegOperands(Inst, 1); 6809 } else { 6810 llvm_unreachable("Invalid operand type"); 6811 } 6812 } else { 6813 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6814 Op.addRegWithFPInputModsOperands(Inst, 2); 6815 } else if (Op.isDPPCtrl()) { 6816 Op.addImmOperands(Inst, 1); 6817 } else if (Op.isImm()) { 6818 // Handle optional arguments 6819 OptionalIdx[Op.getImmTy()] = I; 6820 } else { 6821 llvm_unreachable("Invalid operand type"); 6822 } 6823 } 6824 } 6825 6826 if (IsDPP8) { 6827 using namespace llvm::AMDGPU::DPP; 6828 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6829 } else { 6830 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6831 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6832 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6833 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6835 } 6836 } 6837 } 6838 6839 //===----------------------------------------------------------------------===// 6840 // sdwa 6841 //===----------------------------------------------------------------------===// 6842 6843 OperandMatchResultTy 6844 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6845 AMDGPUOperand::ImmTy Type) { 6846 using namespace llvm::AMDGPU::SDWA; 6847 6848 SMLoc S = Parser.getTok().getLoc(); 6849 StringRef Value; 6850 OperandMatchResultTy res; 6851 6852 res = parseStringWithPrefix(Prefix, Value); 6853 if (res != MatchOperand_Success) { 6854 return res; 6855 } 6856 6857 int64_t Int; 6858 Int = StringSwitch<int64_t>(Value) 6859 .Case("BYTE_0", SdwaSel::BYTE_0) 6860 .Case("BYTE_1", SdwaSel::BYTE_1) 6861 .Case("BYTE_2", SdwaSel::BYTE_2) 6862 .Case("BYTE_3", SdwaSel::BYTE_3) 6863 .Case("WORD_0", SdwaSel::WORD_0) 6864 .Case("WORD_1", SdwaSel::WORD_1) 6865 .Case("DWORD", SdwaSel::DWORD) 6866 .Default(0xffffffff); 6867 Parser.Lex(); // eat last token 6868 6869 if (Int == 0xffffffff) { 6870 return MatchOperand_ParseFail; 6871 } 6872 6873 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6874 return MatchOperand_Success; 6875 } 6876 6877 OperandMatchResultTy 6878 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6879 using namespace llvm::AMDGPU::SDWA; 6880 6881 SMLoc S = Parser.getTok().getLoc(); 6882 StringRef Value; 6883 OperandMatchResultTy res; 6884 6885 res = parseStringWithPrefix("dst_unused", Value); 6886 if (res != MatchOperand_Success) { 6887 return res; 6888 } 6889 6890 int64_t Int; 6891 Int = StringSwitch<int64_t>(Value) 6892 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6893 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6894 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6895 .Default(0xffffffff); 6896 Parser.Lex(); // eat last token 6897 6898 if (Int == 0xffffffff) { 6899 return MatchOperand_ParseFail; 6900 } 6901 6902 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6903 return MatchOperand_Success; 6904 } 6905 6906 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6907 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6908 } 6909 6910 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6911 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6912 } 6913 6914 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6915 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 6916 } 6917 6918 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 6919 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 6920 } 6921 6922 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6923 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6924 } 6925 6926 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6927 uint64_t BasicInstType, 6928 bool SkipDstVcc, 6929 bool SkipSrcVcc) { 6930 using namespace llvm::AMDGPU::SDWA; 6931 6932 OptionalImmIndexMap OptionalIdx; 6933 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 6934 bool SkippedVcc = false; 6935 6936 unsigned I = 1; 6937 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6938 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6939 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6940 } 6941 6942 for (unsigned E = Operands.size(); I != E; ++I) { 6943 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6944 if (SkipVcc && !SkippedVcc && Op.isReg() && 6945 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6946 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6947 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6948 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6949 // Skip VCC only if we didn't skip it on previous iteration. 6950 // Note that src0 and src1 occupy 2 slots each because of modifiers. 6951 if (BasicInstType == SIInstrFlags::VOP2 && 6952 ((SkipDstVcc && Inst.getNumOperands() == 1) || 6953 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 6954 SkippedVcc = true; 6955 continue; 6956 } else if (BasicInstType == SIInstrFlags::VOPC && 6957 Inst.getNumOperands() == 0) { 6958 SkippedVcc = true; 6959 continue; 6960 } 6961 } 6962 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6963 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6964 } else if (Op.isImm()) { 6965 // Handle optional arguments 6966 OptionalIdx[Op.getImmTy()] = I; 6967 } else { 6968 llvm_unreachable("Invalid operand type"); 6969 } 6970 SkippedVcc = false; 6971 } 6972 6973 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6974 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6975 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6976 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6977 switch (BasicInstType) { 6978 case SIInstrFlags::VOP1: 6979 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6980 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6981 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6982 } 6983 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6984 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6985 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6986 break; 6987 6988 case SIInstrFlags::VOP2: 6989 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6990 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6991 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6992 } 6993 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6994 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6995 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6996 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6997 break; 6998 6999 case SIInstrFlags::VOPC: 7000 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7001 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7002 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7003 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7004 break; 7005 7006 default: 7007 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7008 } 7009 } 7010 7011 // special case v_mac_{f16, f32}: 7012 // it has src2 register operand that is tied to dst operand 7013 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7014 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7015 auto it = Inst.begin(); 7016 std::advance( 7017 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7018 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7019 } 7020 } 7021 7022 //===----------------------------------------------------------------------===// 7023 // mAI 7024 //===----------------------------------------------------------------------===// 7025 7026 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7027 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7028 } 7029 7030 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7031 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7032 } 7033 7034 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7035 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7036 } 7037 7038 /// Force static initialization. 7039 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7040 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7041 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7042 } 7043 7044 #define GET_REGISTER_MATCHER 7045 #define GET_MATCHER_IMPLEMENTATION 7046 #define GET_MNEMONIC_SPELL_CHECKER 7047 #include "AMDGPUGenAsmMatcher.inc" 7048 7049 // This fuction should be defined after auto-generated include so that we have 7050 // MatchClassKind enum defined 7051 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7052 unsigned Kind) { 7053 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7054 // But MatchInstructionImpl() expects to meet token and fails to validate 7055 // operand. This method checks if we are given immediate operand but expect to 7056 // get corresponding token. 7057 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7058 switch (Kind) { 7059 case MCK_addr64: 7060 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7061 case MCK_gds: 7062 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7063 case MCK_lds: 7064 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7065 case MCK_glc: 7066 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7067 case MCK_idxen: 7068 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7069 case MCK_offen: 7070 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7071 case MCK_SSrcB32: 7072 // When operands have expression values, they will return true for isToken, 7073 // because it is not possible to distinguish between a token and an 7074 // expression at parse time. MatchInstructionImpl() will always try to 7075 // match an operand as a token, when isToken returns true, and when the 7076 // name of the expression is not a valid token, the match will fail, 7077 // so we need to handle it here. 7078 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7079 case MCK_SSrcF32: 7080 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7081 case MCK_SoppBrTarget: 7082 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7083 case MCK_VReg32OrOff: 7084 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7085 case MCK_InterpSlot: 7086 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7087 case MCK_Attr: 7088 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7089 case MCK_AttrChan: 7090 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7091 case MCK_SReg_64: 7092 case MCK_SReg_64_XEXEC: 7093 // Null is defined as a 32-bit register but 7094 // it should also be enabled with 64-bit operands. 7095 // The following code enables it for SReg_64 operands 7096 // used as source and destination. Remaining source 7097 // operands are handled in isInlinableImm. 7098 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7099 default: 7100 return Match_InvalidOperand; 7101 } 7102 } 7103 7104 //===----------------------------------------------------------------------===// 7105 // endpgm 7106 //===----------------------------------------------------------------------===// 7107 7108 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7109 SMLoc S = Parser.getTok().getLoc(); 7110 int64_t Imm = 0; 7111 7112 if (!parseExpr(Imm)) { 7113 // The operand is optional, if not present default to 0 7114 Imm = 0; 7115 } 7116 7117 if (!isUInt<16>(Imm)) { 7118 Error(S, "expected a 16-bit value"); 7119 return MatchOperand_ParseFail; 7120 } 7121 7122 Operands.push_back( 7123 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7124 return MatchOperand_Success; 7125 } 7126 7127 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7128