1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 enum ImmKindTy { 192 ImmKindTyNone, 193 ImmKindTyLiteral, 194 ImmKindTyConst, 195 }; 196 197 private: 198 struct TokOp { 199 const char *Data; 200 unsigned Length; 201 }; 202 203 struct ImmOp { 204 int64_t Val; 205 ImmTy Type; 206 bool IsFPImm; 207 mutable ImmKindTy Kind; 208 Modifiers Mods; 209 }; 210 211 struct RegOp { 212 unsigned RegNo; 213 Modifiers Mods; 214 }; 215 216 union { 217 TokOp Tok; 218 ImmOp Imm; 219 RegOp Reg; 220 const MCExpr *Expr; 221 }; 222 223 public: 224 bool isToken() const override { 225 if (Kind == Token) 226 return true; 227 228 // When parsing operands, we can't always tell if something was meant to be 229 // a token, like 'gds', or an expression that references a global variable. 230 // In this case, we assume the string is an expression, and if we need to 231 // interpret is a token, then we treat the symbol name as the token. 232 return isSymbolRefExpr(); 233 } 234 235 bool isSymbolRefExpr() const { 236 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 237 } 238 239 bool isImm() const override { 240 return Kind == Immediate; 241 } 242 243 void setImmKindNone() const { 244 assert(isImm()); 245 Imm.Kind = ImmKindTyNone; 246 } 247 248 void setImmKindLiteral() const { 249 assert(isImm()); 250 Imm.Kind = ImmKindTyLiteral; 251 } 252 253 void setImmKindConst() const { 254 assert(isImm()); 255 Imm.Kind = ImmKindTyConst; 256 } 257 258 bool IsImmKindLiteral() const { 259 return isImm() && Imm.Kind == ImmKindTyLiteral; 260 } 261 262 bool isImmKindConst() const { 263 return isImm() && Imm.Kind == ImmKindTyConst; 264 } 265 266 bool isInlinableImm(MVT type) const; 267 bool isLiteralImm(MVT type) const; 268 269 bool isRegKind() const { 270 return Kind == Register; 271 } 272 273 bool isReg() const override { 274 return isRegKind() && !hasModifiers(); 275 } 276 277 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 278 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 279 } 280 281 bool isRegOrImmWithInt16InputMods() const { 282 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 283 } 284 285 bool isRegOrImmWithInt32InputMods() const { 286 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 287 } 288 289 bool isRegOrImmWithInt64InputMods() const { 290 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 291 } 292 293 bool isRegOrImmWithFP16InputMods() const { 294 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 295 } 296 297 bool isRegOrImmWithFP32InputMods() const { 298 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 299 } 300 301 bool isRegOrImmWithFP64InputMods() const { 302 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 303 } 304 305 bool isVReg() const { 306 return isRegClass(AMDGPU::VGPR_32RegClassID) || 307 isRegClass(AMDGPU::VReg_64RegClassID) || 308 isRegClass(AMDGPU::VReg_96RegClassID) || 309 isRegClass(AMDGPU::VReg_128RegClassID) || 310 isRegClass(AMDGPU::VReg_160RegClassID) || 311 isRegClass(AMDGPU::VReg_192RegClassID) || 312 isRegClass(AMDGPU::VReg_256RegClassID) || 313 isRegClass(AMDGPU::VReg_512RegClassID) || 314 isRegClass(AMDGPU::VReg_1024RegClassID); 315 } 316 317 bool isVReg32() const { 318 return isRegClass(AMDGPU::VGPR_32RegClassID); 319 } 320 321 bool isVReg32OrOff() const { 322 return isOff() || isVReg32(); 323 } 324 325 bool isNull() const { 326 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 327 } 328 329 bool isSDWAOperand(MVT type) const; 330 bool isSDWAFP16Operand() const; 331 bool isSDWAFP32Operand() const; 332 bool isSDWAInt16Operand() const; 333 bool isSDWAInt32Operand() const; 334 335 bool isImmTy(ImmTy ImmT) const { 336 return isImm() && Imm.Type == ImmT; 337 } 338 339 bool isImmModifier() const { 340 return isImm() && Imm.Type != ImmTyNone; 341 } 342 343 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 344 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 345 bool isDMask() const { return isImmTy(ImmTyDMask); } 346 bool isDim() const { return isImmTy(ImmTyDim); } 347 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 348 bool isDA() const { return isImmTy(ImmTyDA); } 349 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 350 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 351 bool isLWE() const { return isImmTy(ImmTyLWE); } 352 bool isOff() const { return isImmTy(ImmTyOff); } 353 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 354 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 355 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 356 bool isOffen() const { return isImmTy(ImmTyOffen); } 357 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 358 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 359 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 360 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 361 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 362 363 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 364 bool isGDS() const { return isImmTy(ImmTyGDS); } 365 bool isLDS() const { return isImmTy(ImmTyLDS); } 366 bool isDLC() const { return isImmTy(ImmTyDLC); } 367 bool isGLC() const { return isImmTy(ImmTyGLC); } 368 // "GLC_1" is a MatchClass of the GLC_1 operand with the default and forced 369 // value of the GLC operand. 370 bool isGLC_1() const { return isImmTy(ImmTyGLC); } 371 bool isSLC() const { return isImmTy(ImmTySLC); } 372 bool isSWZ() const { return isImmTy(ImmTySWZ); } 373 bool isTFE() const { return isImmTy(ImmTyTFE); } 374 bool isD16() const { return isImmTy(ImmTyD16); } 375 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 376 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 377 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 378 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 379 bool isFI() const { return isImmTy(ImmTyDppFi); } 380 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 381 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 382 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 383 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 384 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 385 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 386 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 387 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 388 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 389 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 390 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 391 bool isHigh() const { return isImmTy(ImmTyHigh); } 392 393 bool isMod() const { 394 return isClampSI() || isOModSI(); 395 } 396 397 bool isRegOrImm() const { 398 return isReg() || isImm(); 399 } 400 401 bool isRegClass(unsigned RCID) const; 402 403 bool isInlineValue() const; 404 405 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 406 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 407 } 408 409 bool isSCSrcB16() const { 410 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 411 } 412 413 bool isSCSrcV2B16() const { 414 return isSCSrcB16(); 415 } 416 417 bool isSCSrcB32() const { 418 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 419 } 420 421 bool isSCSrcB64() const { 422 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 423 } 424 425 bool isBoolReg() const; 426 427 bool isSCSrcF16() const { 428 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 429 } 430 431 bool isSCSrcV2F16() const { 432 return isSCSrcF16(); 433 } 434 435 bool isSCSrcF32() const { 436 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 437 } 438 439 bool isSCSrcF64() const { 440 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 441 } 442 443 bool isSSrcB32() const { 444 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 445 } 446 447 bool isSSrcB16() const { 448 return isSCSrcB16() || isLiteralImm(MVT::i16); 449 } 450 451 bool isSSrcV2B16() const { 452 llvm_unreachable("cannot happen"); 453 return isSSrcB16(); 454 } 455 456 bool isSSrcB64() const { 457 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 458 // See isVSrc64(). 459 return isSCSrcB64() || isLiteralImm(MVT::i64); 460 } 461 462 bool isSSrcF32() const { 463 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 464 } 465 466 bool isSSrcF64() const { 467 return isSCSrcB64() || isLiteralImm(MVT::f64); 468 } 469 470 bool isSSrcF16() const { 471 return isSCSrcB16() || isLiteralImm(MVT::f16); 472 } 473 474 bool isSSrcV2F16() const { 475 llvm_unreachable("cannot happen"); 476 return isSSrcF16(); 477 } 478 479 bool isSSrcOrLdsB32() const { 480 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 481 isLiteralImm(MVT::i32) || isExpr(); 482 } 483 484 bool isVCSrcB32() const { 485 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 486 } 487 488 bool isVCSrcB64() const { 489 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 490 } 491 492 bool isVCSrcB16() const { 493 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 494 } 495 496 bool isVCSrcV2B16() const { 497 return isVCSrcB16(); 498 } 499 500 bool isVCSrcF32() const { 501 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 502 } 503 504 bool isVCSrcF64() const { 505 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 506 } 507 508 bool isVCSrcF16() const { 509 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 510 } 511 512 bool isVCSrcV2F16() const { 513 return isVCSrcF16(); 514 } 515 516 bool isVSrcB32() const { 517 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 518 } 519 520 bool isVSrcB64() const { 521 return isVCSrcF64() || isLiteralImm(MVT::i64); 522 } 523 524 bool isVSrcB16() const { 525 return isVCSrcB16() || isLiteralImm(MVT::i16); 526 } 527 528 bool isVSrcV2B16() const { 529 return isVSrcB16() || isLiteralImm(MVT::v2i16); 530 } 531 532 bool isVSrcF32() const { 533 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 534 } 535 536 bool isVSrcF64() const { 537 return isVCSrcF64() || isLiteralImm(MVT::f64); 538 } 539 540 bool isVSrcF16() const { 541 return isVCSrcF16() || isLiteralImm(MVT::f16); 542 } 543 544 bool isVSrcV2F16() const { 545 return isVSrcF16() || isLiteralImm(MVT::v2f16); 546 } 547 548 bool isVISrcB32() const { 549 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 550 } 551 552 bool isVISrcB16() const { 553 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 554 } 555 556 bool isVISrcV2B16() const { 557 return isVISrcB16(); 558 } 559 560 bool isVISrcF32() const { 561 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 562 } 563 564 bool isVISrcF16() const { 565 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 566 } 567 568 bool isVISrcV2F16() const { 569 return isVISrcF16() || isVISrcB32(); 570 } 571 572 bool isAISrcB32() const { 573 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 574 } 575 576 bool isAISrcB16() const { 577 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 578 } 579 580 bool isAISrcV2B16() const { 581 return isAISrcB16(); 582 } 583 584 bool isAISrcF32() const { 585 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 586 } 587 588 bool isAISrcF16() const { 589 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 590 } 591 592 bool isAISrcV2F16() const { 593 return isAISrcF16() || isAISrcB32(); 594 } 595 596 bool isAISrc_128B32() const { 597 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 598 } 599 600 bool isAISrc_128B16() const { 601 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 602 } 603 604 bool isAISrc_128V2B16() const { 605 return isAISrc_128B16(); 606 } 607 608 bool isAISrc_128F32() const { 609 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 610 } 611 612 bool isAISrc_128F16() const { 613 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 614 } 615 616 bool isAISrc_128V2F16() const { 617 return isAISrc_128F16() || isAISrc_128B32(); 618 } 619 620 bool isAISrc_512B32() const { 621 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 622 } 623 624 bool isAISrc_512B16() const { 625 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 626 } 627 628 bool isAISrc_512V2B16() const { 629 return isAISrc_512B16(); 630 } 631 632 bool isAISrc_512F32() const { 633 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 634 } 635 636 bool isAISrc_512F16() const { 637 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 638 } 639 640 bool isAISrc_512V2F16() const { 641 return isAISrc_512F16() || isAISrc_512B32(); 642 } 643 644 bool isAISrc_1024B32() const { 645 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 646 } 647 648 bool isAISrc_1024B16() const { 649 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 650 } 651 652 bool isAISrc_1024V2B16() const { 653 return isAISrc_1024B16(); 654 } 655 656 bool isAISrc_1024F32() const { 657 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 658 } 659 660 bool isAISrc_1024F16() const { 661 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 662 } 663 664 bool isAISrc_1024V2F16() const { 665 return isAISrc_1024F16() || isAISrc_1024B32(); 666 } 667 668 bool isKImmFP32() const { 669 return isLiteralImm(MVT::f32); 670 } 671 672 bool isKImmFP16() const { 673 return isLiteralImm(MVT::f16); 674 } 675 676 bool isMem() const override { 677 return false; 678 } 679 680 bool isExpr() const { 681 return Kind == Expression; 682 } 683 684 bool isSoppBrTarget() const { 685 return isExpr() || isImm(); 686 } 687 688 bool isSWaitCnt() const; 689 bool isHwreg() const; 690 bool isSendMsg() const; 691 bool isSwizzle() const; 692 bool isSMRDOffset8() const; 693 bool isSMEMOffset() const; 694 bool isSMRDLiteralOffset() const; 695 bool isDPP8() const; 696 bool isDPPCtrl() const; 697 bool isBLGP() const; 698 bool isCBSZ() const; 699 bool isABID() const; 700 bool isGPRIdxMode() const; 701 bool isS16Imm() const; 702 bool isU16Imm() const; 703 bool isEndpgm() const; 704 705 StringRef getExpressionAsToken() const { 706 assert(isExpr()); 707 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 708 return S->getSymbol().getName(); 709 } 710 711 StringRef getToken() const { 712 assert(isToken()); 713 714 if (Kind == Expression) 715 return getExpressionAsToken(); 716 717 return StringRef(Tok.Data, Tok.Length); 718 } 719 720 int64_t getImm() const { 721 assert(isImm()); 722 return Imm.Val; 723 } 724 725 void setImm(int64_t Val) { 726 assert(isImm()); 727 Imm.Val = Val; 728 } 729 730 ImmTy getImmTy() const { 731 assert(isImm()); 732 return Imm.Type; 733 } 734 735 unsigned getReg() const override { 736 assert(isRegKind()); 737 return Reg.RegNo; 738 } 739 740 SMLoc getStartLoc() const override { 741 return StartLoc; 742 } 743 744 SMLoc getEndLoc() const override { 745 return EndLoc; 746 } 747 748 SMRange getLocRange() const { 749 return SMRange(StartLoc, EndLoc); 750 } 751 752 Modifiers getModifiers() const { 753 assert(isRegKind() || isImmTy(ImmTyNone)); 754 return isRegKind() ? Reg.Mods : Imm.Mods; 755 } 756 757 void setModifiers(Modifiers Mods) { 758 assert(isRegKind() || isImmTy(ImmTyNone)); 759 if (isRegKind()) 760 Reg.Mods = Mods; 761 else 762 Imm.Mods = Mods; 763 } 764 765 bool hasModifiers() const { 766 return getModifiers().hasModifiers(); 767 } 768 769 bool hasFPModifiers() const { 770 return getModifiers().hasFPModifiers(); 771 } 772 773 bool hasIntModifiers() const { 774 return getModifiers().hasIntModifiers(); 775 } 776 777 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 778 779 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 780 781 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 782 783 template <unsigned Bitwidth> 784 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 785 786 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 787 addKImmFPOperands<16>(Inst, N); 788 } 789 790 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 791 addKImmFPOperands<32>(Inst, N); 792 } 793 794 void addRegOperands(MCInst &Inst, unsigned N) const; 795 796 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 797 addRegOperands(Inst, N); 798 } 799 800 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 801 if (isRegKind()) 802 addRegOperands(Inst, N); 803 else if (isExpr()) 804 Inst.addOperand(MCOperand::createExpr(Expr)); 805 else 806 addImmOperands(Inst, N); 807 } 808 809 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 810 Modifiers Mods = getModifiers(); 811 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 812 if (isRegKind()) { 813 addRegOperands(Inst, N); 814 } else { 815 addImmOperands(Inst, N, false); 816 } 817 } 818 819 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 820 assert(!hasIntModifiers()); 821 addRegOrImmWithInputModsOperands(Inst, N); 822 } 823 824 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 825 assert(!hasFPModifiers()); 826 addRegOrImmWithInputModsOperands(Inst, N); 827 } 828 829 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 830 Modifiers Mods = getModifiers(); 831 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 832 assert(isRegKind()); 833 addRegOperands(Inst, N); 834 } 835 836 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 837 assert(!hasIntModifiers()); 838 addRegWithInputModsOperands(Inst, N); 839 } 840 841 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 842 assert(!hasFPModifiers()); 843 addRegWithInputModsOperands(Inst, N); 844 } 845 846 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 847 if (isImm()) 848 addImmOperands(Inst, N); 849 else { 850 assert(isExpr()); 851 Inst.addOperand(MCOperand::createExpr(Expr)); 852 } 853 } 854 855 static void printImmTy(raw_ostream& OS, ImmTy Type) { 856 switch (Type) { 857 case ImmTyNone: OS << "None"; break; 858 case ImmTyGDS: OS << "GDS"; break; 859 case ImmTyLDS: OS << "LDS"; break; 860 case ImmTyOffen: OS << "Offen"; break; 861 case ImmTyIdxen: OS << "Idxen"; break; 862 case ImmTyAddr64: OS << "Addr64"; break; 863 case ImmTyOffset: OS << "Offset"; break; 864 case ImmTyInstOffset: OS << "InstOffset"; break; 865 case ImmTyOffset0: OS << "Offset0"; break; 866 case ImmTyOffset1: OS << "Offset1"; break; 867 case ImmTyDLC: OS << "DLC"; break; 868 case ImmTyGLC: OS << "GLC"; break; 869 case ImmTySLC: OS << "SLC"; break; 870 case ImmTySWZ: OS << "SWZ"; break; 871 case ImmTyTFE: OS << "TFE"; break; 872 case ImmTyD16: OS << "D16"; break; 873 case ImmTyFORMAT: OS << "FORMAT"; break; 874 case ImmTyClampSI: OS << "ClampSI"; break; 875 case ImmTyOModSI: OS << "OModSI"; break; 876 case ImmTyDPP8: OS << "DPP8"; break; 877 case ImmTyDppCtrl: OS << "DppCtrl"; break; 878 case ImmTyDppRowMask: OS << "DppRowMask"; break; 879 case ImmTyDppBankMask: OS << "DppBankMask"; break; 880 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 881 case ImmTyDppFi: OS << "FI"; break; 882 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 883 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 884 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 885 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 886 case ImmTyDMask: OS << "DMask"; break; 887 case ImmTyDim: OS << "Dim"; break; 888 case ImmTyUNorm: OS << "UNorm"; break; 889 case ImmTyDA: OS << "DA"; break; 890 case ImmTyR128A16: OS << "R128A16"; break; 891 case ImmTyA16: OS << "A16"; break; 892 case ImmTyLWE: OS << "LWE"; break; 893 case ImmTyOff: OS << "Off"; break; 894 case ImmTyExpTgt: OS << "ExpTgt"; break; 895 case ImmTyExpCompr: OS << "ExpCompr"; break; 896 case ImmTyExpVM: OS << "ExpVM"; break; 897 case ImmTyHwreg: OS << "Hwreg"; break; 898 case ImmTySendMsg: OS << "SendMsg"; break; 899 case ImmTyInterpSlot: OS << "InterpSlot"; break; 900 case ImmTyInterpAttr: OS << "InterpAttr"; break; 901 case ImmTyAttrChan: OS << "AttrChan"; break; 902 case ImmTyOpSel: OS << "OpSel"; break; 903 case ImmTyOpSelHi: OS << "OpSelHi"; break; 904 case ImmTyNegLo: OS << "NegLo"; break; 905 case ImmTyNegHi: OS << "NegHi"; break; 906 case ImmTySwizzle: OS << "Swizzle"; break; 907 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 908 case ImmTyHigh: OS << "High"; break; 909 case ImmTyBLGP: OS << "BLGP"; break; 910 case ImmTyCBSZ: OS << "CBSZ"; break; 911 case ImmTyABID: OS << "ABID"; break; 912 case ImmTyEndpgm: OS << "Endpgm"; break; 913 } 914 } 915 916 void print(raw_ostream &OS) const override { 917 switch (Kind) { 918 case Register: 919 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 920 break; 921 case Immediate: 922 OS << '<' << getImm(); 923 if (getImmTy() != ImmTyNone) { 924 OS << " type: "; printImmTy(OS, getImmTy()); 925 } 926 OS << " mods: " << Imm.Mods << '>'; 927 break; 928 case Token: 929 OS << '\'' << getToken() << '\''; 930 break; 931 case Expression: 932 OS << "<expr " << *Expr << '>'; 933 break; 934 } 935 } 936 937 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 938 int64_t Val, SMLoc Loc, 939 ImmTy Type = ImmTyNone, 940 bool IsFPImm = false) { 941 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 942 Op->Imm.Val = Val; 943 Op->Imm.IsFPImm = IsFPImm; 944 Op->Imm.Kind = ImmKindTyNone; 945 Op->Imm.Type = Type; 946 Op->Imm.Mods = Modifiers(); 947 Op->StartLoc = Loc; 948 Op->EndLoc = Loc; 949 return Op; 950 } 951 952 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 953 StringRef Str, SMLoc Loc, 954 bool HasExplicitEncodingSize = true) { 955 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 956 Res->Tok.Data = Str.data(); 957 Res->Tok.Length = Str.size(); 958 Res->StartLoc = Loc; 959 Res->EndLoc = Loc; 960 return Res; 961 } 962 963 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 964 unsigned RegNo, SMLoc S, 965 SMLoc E) { 966 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 967 Op->Reg.RegNo = RegNo; 968 Op->Reg.Mods = Modifiers(); 969 Op->StartLoc = S; 970 Op->EndLoc = E; 971 return Op; 972 } 973 974 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 975 const class MCExpr *Expr, SMLoc S) { 976 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 977 Op->Expr = Expr; 978 Op->StartLoc = S; 979 Op->EndLoc = S; 980 return Op; 981 } 982 }; 983 984 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 985 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 986 return OS; 987 } 988 989 //===----------------------------------------------------------------------===// 990 // AsmParser 991 //===----------------------------------------------------------------------===// 992 993 // Holds info related to the current kernel, e.g. count of SGPRs used. 994 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 995 // .amdgpu_hsa_kernel or at EOF. 996 class KernelScopeInfo { 997 int SgprIndexUnusedMin = -1; 998 int VgprIndexUnusedMin = -1; 999 MCContext *Ctx = nullptr; 1000 1001 void usesSgprAt(int i) { 1002 if (i >= SgprIndexUnusedMin) { 1003 SgprIndexUnusedMin = ++i; 1004 if (Ctx) { 1005 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 1006 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 1007 } 1008 } 1009 } 1010 1011 void usesVgprAt(int i) { 1012 if (i >= VgprIndexUnusedMin) { 1013 VgprIndexUnusedMin = ++i; 1014 if (Ctx) { 1015 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 1016 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 1017 } 1018 } 1019 } 1020 1021 public: 1022 KernelScopeInfo() = default; 1023 1024 void initialize(MCContext &Context) { 1025 Ctx = &Context; 1026 usesSgprAt(SgprIndexUnusedMin = -1); 1027 usesVgprAt(VgprIndexUnusedMin = -1); 1028 } 1029 1030 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 1031 switch (RegKind) { 1032 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 1033 case IS_AGPR: // fall through 1034 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1035 default: break; 1036 } 1037 } 1038 }; 1039 1040 class AMDGPUAsmParser : public MCTargetAsmParser { 1041 MCAsmParser &Parser; 1042 1043 // Number of extra operands parsed after the first optional operand. 1044 // This may be necessary to skip hardcoded mandatory operands. 1045 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1046 1047 unsigned ForcedEncodingSize = 0; 1048 bool ForcedDPP = false; 1049 bool ForcedSDWA = false; 1050 KernelScopeInfo KernelScope; 1051 1052 /// @name Auto-generated Match Functions 1053 /// { 1054 1055 #define GET_ASSEMBLER_HEADER 1056 #include "AMDGPUGenAsmMatcher.inc" 1057 1058 /// } 1059 1060 private: 1061 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1062 bool OutOfRangeError(SMRange Range); 1063 /// Calculate VGPR/SGPR blocks required for given target, reserved 1064 /// registers, and user-specified NextFreeXGPR values. 1065 /// 1066 /// \param Features [in] Target features, used for bug corrections. 1067 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1068 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1069 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1070 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1071 /// descriptor field, if valid. 1072 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1073 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1074 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1075 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1076 /// \param VGPRBlocks [out] Result VGPR block count. 1077 /// \param SGPRBlocks [out] Result SGPR block count. 1078 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1079 bool FlatScrUsed, bool XNACKUsed, 1080 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1081 SMRange VGPRRange, unsigned NextFreeSGPR, 1082 SMRange SGPRRange, unsigned &VGPRBlocks, 1083 unsigned &SGPRBlocks); 1084 bool ParseDirectiveAMDGCNTarget(); 1085 bool ParseDirectiveAMDHSAKernel(); 1086 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1087 bool ParseDirectiveHSACodeObjectVersion(); 1088 bool ParseDirectiveHSACodeObjectISA(); 1089 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1090 bool ParseDirectiveAMDKernelCodeT(); 1091 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1092 bool ParseDirectiveAMDGPUHsaKernel(); 1093 1094 bool ParseDirectiveISAVersion(); 1095 bool ParseDirectiveHSAMetadata(); 1096 bool ParseDirectivePALMetadataBegin(); 1097 bool ParseDirectivePALMetadata(); 1098 bool ParseDirectiveAMDGPULDS(); 1099 1100 /// Common code to parse out a block of text (typically YAML) between start and 1101 /// end directives. 1102 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1103 const char *AssemblerDirectiveEnd, 1104 std::string &CollectString); 1105 1106 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1107 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1108 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1109 unsigned &RegNum, unsigned &RegWidth, 1110 bool RestoreOnFailure = false); 1111 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1112 unsigned &RegNum, unsigned &RegWidth, 1113 SmallVectorImpl<AsmToken> &Tokens); 1114 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1115 unsigned &RegWidth, 1116 SmallVectorImpl<AsmToken> &Tokens); 1117 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1118 unsigned &RegWidth, 1119 SmallVectorImpl<AsmToken> &Tokens); 1120 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1121 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1122 bool ParseRegRange(unsigned& Num, unsigned& Width); 1123 unsigned getRegularReg(RegisterKind RegKind, 1124 unsigned RegNum, 1125 unsigned RegWidth, 1126 SMLoc Loc); 1127 1128 bool isRegister(); 1129 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1130 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1131 void initializeGprCountSymbol(RegisterKind RegKind); 1132 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1133 unsigned RegWidth); 1134 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1135 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1136 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1137 bool IsGdsHardcoded); 1138 1139 public: 1140 enum AMDGPUMatchResultTy { 1141 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1142 }; 1143 enum OperandMode { 1144 OperandMode_Default, 1145 OperandMode_NSA, 1146 }; 1147 1148 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1149 1150 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1151 const MCInstrInfo &MII, 1152 const MCTargetOptions &Options) 1153 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1154 MCAsmParserExtension::Initialize(Parser); 1155 1156 if (getFeatureBits().none()) { 1157 // Set default features. 1158 copySTI().ToggleFeature("southern-islands"); 1159 } 1160 1161 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1162 1163 { 1164 // TODO: make those pre-defined variables read-only. 1165 // Currently there is none suitable machinery in the core llvm-mc for this. 1166 // MCSymbol::isRedefinable is intended for another purpose, and 1167 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1168 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1169 MCContext &Ctx = getContext(); 1170 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1171 MCSymbol *Sym = 1172 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1173 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1174 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1175 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1176 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1177 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1178 } else { 1179 MCSymbol *Sym = 1180 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1181 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1182 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1183 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1184 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1185 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1186 } 1187 if (ISA.Major >= 6 && isHsaAbiVersion3(&getSTI())) { 1188 initializeGprCountSymbol(IS_VGPR); 1189 initializeGprCountSymbol(IS_SGPR); 1190 } else 1191 KernelScope.initialize(getContext()); 1192 } 1193 } 1194 1195 bool hasXNACK() const { 1196 return AMDGPU::hasXNACK(getSTI()); 1197 } 1198 1199 bool hasMIMG_R128() const { 1200 return AMDGPU::hasMIMG_R128(getSTI()); 1201 } 1202 1203 bool hasPackedD16() const { 1204 return AMDGPU::hasPackedD16(getSTI()); 1205 } 1206 1207 bool hasGFX10A16() const { 1208 return AMDGPU::hasGFX10A16(getSTI()); 1209 } 1210 1211 bool isSI() const { 1212 return AMDGPU::isSI(getSTI()); 1213 } 1214 1215 bool isCI() const { 1216 return AMDGPU::isCI(getSTI()); 1217 } 1218 1219 bool isVI() const { 1220 return AMDGPU::isVI(getSTI()); 1221 } 1222 1223 bool isGFX9() const { 1224 return AMDGPU::isGFX9(getSTI()); 1225 } 1226 1227 bool isGFX9Plus() const { 1228 return AMDGPU::isGFX9Plus(getSTI()); 1229 } 1230 1231 bool isGFX10() const { 1232 return AMDGPU::isGFX10(getSTI()); 1233 } 1234 1235 bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); } 1236 1237 bool isGFX10_BEncoding() const { 1238 return AMDGPU::isGFX10_BEncoding(getSTI()); 1239 } 1240 1241 bool hasInv2PiInlineImm() const { 1242 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1243 } 1244 1245 bool hasFlatOffsets() const { 1246 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1247 } 1248 1249 bool hasSGPR102_SGPR103() const { 1250 return !isVI() && !isGFX9(); 1251 } 1252 1253 bool hasSGPR104_SGPR105() const { return isGFX10Plus(); } 1254 1255 bool hasIntClamp() const { 1256 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1257 } 1258 1259 AMDGPUTargetStreamer &getTargetStreamer() { 1260 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1261 return static_cast<AMDGPUTargetStreamer &>(TS); 1262 } 1263 1264 const MCRegisterInfo *getMRI() const { 1265 // We need this const_cast because for some reason getContext() is not const 1266 // in MCAsmParser. 1267 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1268 } 1269 1270 const MCInstrInfo *getMII() const { 1271 return &MII; 1272 } 1273 1274 const FeatureBitset &getFeatureBits() const { 1275 return getSTI().getFeatureBits(); 1276 } 1277 1278 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1279 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1280 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1281 1282 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1283 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1284 bool isForcedDPP() const { return ForcedDPP; } 1285 bool isForcedSDWA() const { return ForcedSDWA; } 1286 ArrayRef<unsigned> getMatchedVariants() const; 1287 StringRef getMatchedVariantName() const; 1288 1289 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1290 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1291 bool RestoreOnFailure); 1292 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1293 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1294 SMLoc &EndLoc) override; 1295 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1296 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1297 unsigned Kind) override; 1298 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1299 OperandVector &Operands, MCStreamer &Out, 1300 uint64_t &ErrorInfo, 1301 bool MatchingInlineAsm) override; 1302 bool ParseDirective(AsmToken DirectiveID) override; 1303 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1304 OperandMode Mode = OperandMode_Default); 1305 StringRef parseMnemonicSuffix(StringRef Name); 1306 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1307 SMLoc NameLoc, OperandVector &Operands) override; 1308 //bool ProcessInstruction(MCInst &Inst); 1309 1310 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1311 1312 OperandMatchResultTy 1313 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1314 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1315 bool (*ConvertResult)(int64_t &) = nullptr); 1316 1317 OperandMatchResultTy 1318 parseOperandArrayWithPrefix(const char *Prefix, 1319 OperandVector &Operands, 1320 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1321 bool (*ConvertResult)(int64_t&) = nullptr); 1322 1323 OperandMatchResultTy 1324 parseNamedBit(const char *Name, OperandVector &Operands, 1325 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1326 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1327 StringRef &Value); 1328 1329 bool isModifier(); 1330 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1331 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1332 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1333 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1334 bool parseSP3NegModifier(); 1335 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1336 OperandMatchResultTy parseReg(OperandVector &Operands); 1337 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1338 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1339 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1340 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1341 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1342 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1343 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1344 OperandMatchResultTy parseUfmt(int64_t &Format); 1345 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1346 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1347 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1348 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1349 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1350 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1351 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1352 1353 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1354 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1355 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1356 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1357 1358 bool parseCnt(int64_t &IntVal); 1359 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1360 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1361 1362 private: 1363 struct OperandInfoTy { 1364 SMLoc Loc; 1365 int64_t Id; 1366 bool IsSymbolic = false; 1367 bool IsDefined = false; 1368 1369 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1370 }; 1371 1372 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1373 bool validateSendMsg(const OperandInfoTy &Msg, 1374 const OperandInfoTy &Op, 1375 const OperandInfoTy &Stream); 1376 1377 bool parseHwregBody(OperandInfoTy &HwReg, 1378 OperandInfoTy &Offset, 1379 OperandInfoTy &Width); 1380 bool validateHwreg(const OperandInfoTy &HwReg, 1381 const OperandInfoTy &Offset, 1382 const OperandInfoTy &Width); 1383 1384 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1385 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1386 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1387 1388 SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 1389 const OperandVector &Operands) const; 1390 SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const; 1391 SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const; 1392 SMLoc getLitLoc(const OperandVector &Operands) const; 1393 SMLoc getConstLoc(const OperandVector &Operands) const; 1394 1395 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1396 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1397 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1398 bool validateSOPLiteral(const MCInst &Inst) const; 1399 bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands); 1400 bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands); 1401 bool validateIntClampSupported(const MCInst &Inst); 1402 bool validateMIMGAtomicDMask(const MCInst &Inst); 1403 bool validateMIMGGatherDMask(const MCInst &Inst); 1404 bool validateMovrels(const MCInst &Inst, const OperandVector &Operands); 1405 bool validateMIMGDataSize(const MCInst &Inst); 1406 bool validateMIMGAddrSize(const MCInst &Inst); 1407 bool validateMIMGD16(const MCInst &Inst); 1408 bool validateMIMGDim(const MCInst &Inst); 1409 bool validateLdsDirect(const MCInst &Inst); 1410 bool validateOpSel(const MCInst &Inst); 1411 bool validateVccOperand(unsigned Reg) const; 1412 bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands); 1413 bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands); 1414 bool validateDivScale(const MCInst &Inst); 1415 bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands, 1416 const SMLoc &IDLoc); 1417 unsigned getConstantBusLimit(unsigned Opcode) const; 1418 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1419 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1420 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1421 1422 bool isSupportedMnemo(StringRef Mnemo, 1423 const FeatureBitset &FBS); 1424 bool isSupportedMnemo(StringRef Mnemo, 1425 const FeatureBitset &FBS, 1426 ArrayRef<unsigned> Variants); 1427 bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc); 1428 1429 bool isId(const StringRef Id) const; 1430 bool isId(const AsmToken &Token, const StringRef Id) const; 1431 bool isToken(const AsmToken::TokenKind Kind) const; 1432 bool trySkipId(const StringRef Id); 1433 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1434 bool trySkipToken(const AsmToken::TokenKind Kind); 1435 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1436 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1437 bool parseId(StringRef &Val, const StringRef ErrMsg); 1438 1439 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1440 AsmToken::TokenKind getTokenKind() const; 1441 bool parseExpr(int64_t &Imm, StringRef Expected = ""); 1442 bool parseExpr(OperandVector &Operands); 1443 StringRef getTokenStr() const; 1444 AsmToken peekToken(); 1445 AsmToken getToken() const; 1446 SMLoc getLoc() const; 1447 void lex(); 1448 1449 public: 1450 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1451 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1452 1453 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1454 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1455 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1456 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1457 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1458 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1459 1460 bool parseSwizzleOperand(int64_t &Op, 1461 const unsigned MinVal, 1462 const unsigned MaxVal, 1463 const StringRef ErrMsg, 1464 SMLoc &Loc); 1465 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1466 const unsigned MinVal, 1467 const unsigned MaxVal, 1468 const StringRef ErrMsg); 1469 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1470 bool parseSwizzleOffset(int64_t &Imm); 1471 bool parseSwizzleMacro(int64_t &Imm); 1472 bool parseSwizzleQuadPerm(int64_t &Imm); 1473 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1474 bool parseSwizzleBroadcast(int64_t &Imm); 1475 bool parseSwizzleSwap(int64_t &Imm); 1476 bool parseSwizzleReverse(int64_t &Imm); 1477 1478 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1479 int64_t parseGPRIdxMacro(); 1480 1481 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1482 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1483 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1484 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1485 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1486 1487 AMDGPUOperand::Ptr defaultDLC() const; 1488 AMDGPUOperand::Ptr defaultGLC() const; 1489 AMDGPUOperand::Ptr defaultGLC_1() const; 1490 AMDGPUOperand::Ptr defaultSLC() const; 1491 1492 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1493 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1494 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1495 AMDGPUOperand::Ptr defaultFlatOffset() const; 1496 1497 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1498 1499 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1500 OptionalImmIndexMap &OptionalIdx); 1501 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1502 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1503 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1504 1505 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1506 1507 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1508 bool IsAtomic = false); 1509 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1510 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1511 1512 OperandMatchResultTy parseDim(OperandVector &Operands); 1513 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1514 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1515 AMDGPUOperand::Ptr defaultRowMask() const; 1516 AMDGPUOperand::Ptr defaultBankMask() const; 1517 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1518 AMDGPUOperand::Ptr defaultFI() const; 1519 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1520 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1521 1522 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1523 AMDGPUOperand::ImmTy Type); 1524 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1525 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1526 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1527 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1528 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1529 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1530 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1531 uint64_t BasicInstType, 1532 bool SkipDstVcc = false, 1533 bool SkipSrcVcc = false); 1534 1535 AMDGPUOperand::Ptr defaultBLGP() const; 1536 AMDGPUOperand::Ptr defaultCBSZ() const; 1537 AMDGPUOperand::Ptr defaultABID() const; 1538 1539 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1540 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1541 }; 1542 1543 struct OptionalOperand { 1544 const char *Name; 1545 AMDGPUOperand::ImmTy Type; 1546 bool IsBit; 1547 bool (*ConvertResult)(int64_t&); 1548 }; 1549 1550 } // end anonymous namespace 1551 1552 // May be called with integer type with equivalent bitwidth. 1553 static const fltSemantics *getFltSemantics(unsigned Size) { 1554 switch (Size) { 1555 case 4: 1556 return &APFloat::IEEEsingle(); 1557 case 8: 1558 return &APFloat::IEEEdouble(); 1559 case 2: 1560 return &APFloat::IEEEhalf(); 1561 default: 1562 llvm_unreachable("unsupported fp type"); 1563 } 1564 } 1565 1566 static const fltSemantics *getFltSemantics(MVT VT) { 1567 return getFltSemantics(VT.getSizeInBits() / 8); 1568 } 1569 1570 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1571 switch (OperandType) { 1572 case AMDGPU::OPERAND_REG_IMM_INT32: 1573 case AMDGPU::OPERAND_REG_IMM_FP32: 1574 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1575 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1576 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1577 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1578 return &APFloat::IEEEsingle(); 1579 case AMDGPU::OPERAND_REG_IMM_INT64: 1580 case AMDGPU::OPERAND_REG_IMM_FP64: 1581 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1582 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1583 return &APFloat::IEEEdouble(); 1584 case AMDGPU::OPERAND_REG_IMM_INT16: 1585 case AMDGPU::OPERAND_REG_IMM_FP16: 1586 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1587 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1588 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1589 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1590 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1591 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1592 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1593 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1594 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1595 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1596 return &APFloat::IEEEhalf(); 1597 default: 1598 llvm_unreachable("unsupported fp type"); 1599 } 1600 } 1601 1602 //===----------------------------------------------------------------------===// 1603 // Operand 1604 //===----------------------------------------------------------------------===// 1605 1606 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1607 bool Lost; 1608 1609 // Convert literal to single precision 1610 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1611 APFloat::rmNearestTiesToEven, 1612 &Lost); 1613 // We allow precision lost but not overflow or underflow 1614 if (Status != APFloat::opOK && 1615 Lost && 1616 ((Status & APFloat::opOverflow) != 0 || 1617 (Status & APFloat::opUnderflow) != 0)) { 1618 return false; 1619 } 1620 1621 return true; 1622 } 1623 1624 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1625 return isUIntN(Size, Val) || isIntN(Size, Val); 1626 } 1627 1628 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1629 if (VT.getScalarType() == MVT::i16) { 1630 // FP immediate values are broken. 1631 return isInlinableIntLiteral(Val); 1632 } 1633 1634 // f16/v2f16 operands work correctly for all values. 1635 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1636 } 1637 1638 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1639 1640 // This is a hack to enable named inline values like 1641 // shared_base with both 32-bit and 64-bit operands. 1642 // Note that these values are defined as 1643 // 32-bit operands only. 1644 if (isInlineValue()) { 1645 return true; 1646 } 1647 1648 if (!isImmTy(ImmTyNone)) { 1649 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1650 return false; 1651 } 1652 // TODO: We should avoid using host float here. It would be better to 1653 // check the float bit values which is what a few other places do. 1654 // We've had bot failures before due to weird NaN support on mips hosts. 1655 1656 APInt Literal(64, Imm.Val); 1657 1658 if (Imm.IsFPImm) { // We got fp literal token 1659 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1660 return AMDGPU::isInlinableLiteral64(Imm.Val, 1661 AsmParser->hasInv2PiInlineImm()); 1662 } 1663 1664 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1665 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1666 return false; 1667 1668 if (type.getScalarSizeInBits() == 16) { 1669 return isInlineableLiteralOp16( 1670 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1671 type, AsmParser->hasInv2PiInlineImm()); 1672 } 1673 1674 // Check if single precision literal is inlinable 1675 return AMDGPU::isInlinableLiteral32( 1676 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1677 AsmParser->hasInv2PiInlineImm()); 1678 } 1679 1680 // We got int literal token. 1681 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1682 return AMDGPU::isInlinableLiteral64(Imm.Val, 1683 AsmParser->hasInv2PiInlineImm()); 1684 } 1685 1686 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1687 return false; 1688 } 1689 1690 if (type.getScalarSizeInBits() == 16) { 1691 return isInlineableLiteralOp16( 1692 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1693 type, AsmParser->hasInv2PiInlineImm()); 1694 } 1695 1696 return AMDGPU::isInlinableLiteral32( 1697 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1698 AsmParser->hasInv2PiInlineImm()); 1699 } 1700 1701 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1702 // Check that this immediate can be added as literal 1703 if (!isImmTy(ImmTyNone)) { 1704 return false; 1705 } 1706 1707 if (!Imm.IsFPImm) { 1708 // We got int literal token. 1709 1710 if (type == MVT::f64 && hasFPModifiers()) { 1711 // Cannot apply fp modifiers to int literals preserving the same semantics 1712 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1713 // disable these cases. 1714 return false; 1715 } 1716 1717 unsigned Size = type.getSizeInBits(); 1718 if (Size == 64) 1719 Size = 32; 1720 1721 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1722 // types. 1723 return isSafeTruncation(Imm.Val, Size); 1724 } 1725 1726 // We got fp literal token 1727 if (type == MVT::f64) { // Expected 64-bit fp operand 1728 // We would set low 64-bits of literal to zeroes but we accept this literals 1729 return true; 1730 } 1731 1732 if (type == MVT::i64) { // Expected 64-bit int operand 1733 // We don't allow fp literals in 64-bit integer instructions. It is 1734 // unclear how we should encode them. 1735 return false; 1736 } 1737 1738 // We allow fp literals with f16x2 operands assuming that the specified 1739 // literal goes into the lower half and the upper half is zero. We also 1740 // require that the literal may be losslesly converted to f16. 1741 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1742 (type == MVT::v2i16)? MVT::i16 : type; 1743 1744 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1745 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1746 } 1747 1748 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1749 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1750 } 1751 1752 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1753 if (AsmParser->isVI()) 1754 return isVReg32(); 1755 else if (AsmParser->isGFX9Plus()) 1756 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1757 else 1758 return false; 1759 } 1760 1761 bool AMDGPUOperand::isSDWAFP16Operand() const { 1762 return isSDWAOperand(MVT::f16); 1763 } 1764 1765 bool AMDGPUOperand::isSDWAFP32Operand() const { 1766 return isSDWAOperand(MVT::f32); 1767 } 1768 1769 bool AMDGPUOperand::isSDWAInt16Operand() const { 1770 return isSDWAOperand(MVT::i16); 1771 } 1772 1773 bool AMDGPUOperand::isSDWAInt32Operand() const { 1774 return isSDWAOperand(MVT::i32); 1775 } 1776 1777 bool AMDGPUOperand::isBoolReg() const { 1778 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1779 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1780 } 1781 1782 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1783 { 1784 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1785 assert(Size == 2 || Size == 4 || Size == 8); 1786 1787 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1788 1789 if (Imm.Mods.Abs) { 1790 Val &= ~FpSignMask; 1791 } 1792 if (Imm.Mods.Neg) { 1793 Val ^= FpSignMask; 1794 } 1795 1796 return Val; 1797 } 1798 1799 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1800 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1801 Inst.getNumOperands())) { 1802 addLiteralImmOperand(Inst, Imm.Val, 1803 ApplyModifiers & 1804 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1805 } else { 1806 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1807 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1808 setImmKindNone(); 1809 } 1810 } 1811 1812 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1813 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1814 auto OpNum = Inst.getNumOperands(); 1815 // Check that this operand accepts literals 1816 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1817 1818 if (ApplyModifiers) { 1819 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1820 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1821 Val = applyInputFPModifiers(Val, Size); 1822 } 1823 1824 APInt Literal(64, Val); 1825 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1826 1827 if (Imm.IsFPImm) { // We got fp literal token 1828 switch (OpTy) { 1829 case AMDGPU::OPERAND_REG_IMM_INT64: 1830 case AMDGPU::OPERAND_REG_IMM_FP64: 1831 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1832 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1833 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1834 AsmParser->hasInv2PiInlineImm())) { 1835 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1836 setImmKindConst(); 1837 return; 1838 } 1839 1840 // Non-inlineable 1841 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1842 // For fp operands we check if low 32 bits are zeros 1843 if (Literal.getLoBits(32) != 0) { 1844 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1845 "Can't encode literal as exact 64-bit floating-point operand. " 1846 "Low 32-bits will be set to zero"); 1847 } 1848 1849 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1850 setImmKindLiteral(); 1851 return; 1852 } 1853 1854 // We don't allow fp literals in 64-bit integer instructions. It is 1855 // unclear how we should encode them. This case should be checked earlier 1856 // in predicate methods (isLiteralImm()) 1857 llvm_unreachable("fp literal in 64-bit integer instruction."); 1858 1859 case AMDGPU::OPERAND_REG_IMM_INT32: 1860 case AMDGPU::OPERAND_REG_IMM_FP32: 1861 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1862 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1863 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1864 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1865 case AMDGPU::OPERAND_REG_IMM_INT16: 1866 case AMDGPU::OPERAND_REG_IMM_FP16: 1867 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1868 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1869 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1870 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1871 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1872 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1873 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1874 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1875 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1876 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1877 bool lost; 1878 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1879 // Convert literal to single precision 1880 FPLiteral.convert(*getOpFltSemantics(OpTy), 1881 APFloat::rmNearestTiesToEven, &lost); 1882 // We allow precision lost but not overflow or underflow. This should be 1883 // checked earlier in isLiteralImm() 1884 1885 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1886 Inst.addOperand(MCOperand::createImm(ImmVal)); 1887 setImmKindLiteral(); 1888 return; 1889 } 1890 default: 1891 llvm_unreachable("invalid operand size"); 1892 } 1893 1894 return; 1895 } 1896 1897 // We got int literal token. 1898 // Only sign extend inline immediates. 1899 switch (OpTy) { 1900 case AMDGPU::OPERAND_REG_IMM_INT32: 1901 case AMDGPU::OPERAND_REG_IMM_FP32: 1902 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1903 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1904 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1905 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1906 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1907 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1908 if (isSafeTruncation(Val, 32) && 1909 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1910 AsmParser->hasInv2PiInlineImm())) { 1911 Inst.addOperand(MCOperand::createImm(Val)); 1912 setImmKindConst(); 1913 return; 1914 } 1915 1916 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1917 setImmKindLiteral(); 1918 return; 1919 1920 case AMDGPU::OPERAND_REG_IMM_INT64: 1921 case AMDGPU::OPERAND_REG_IMM_FP64: 1922 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1923 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1924 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1925 Inst.addOperand(MCOperand::createImm(Val)); 1926 setImmKindConst(); 1927 return; 1928 } 1929 1930 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1931 setImmKindLiteral(); 1932 return; 1933 1934 case AMDGPU::OPERAND_REG_IMM_INT16: 1935 case AMDGPU::OPERAND_REG_IMM_FP16: 1936 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1937 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1938 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1939 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1940 if (isSafeTruncation(Val, 16) && 1941 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1942 AsmParser->hasInv2PiInlineImm())) { 1943 Inst.addOperand(MCOperand::createImm(Val)); 1944 setImmKindConst(); 1945 return; 1946 } 1947 1948 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1949 setImmKindLiteral(); 1950 return; 1951 1952 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1953 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1954 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1955 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1956 assert(isSafeTruncation(Val, 16)); 1957 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1958 AsmParser->hasInv2PiInlineImm())); 1959 1960 Inst.addOperand(MCOperand::createImm(Val)); 1961 return; 1962 } 1963 default: 1964 llvm_unreachable("invalid operand size"); 1965 } 1966 } 1967 1968 template <unsigned Bitwidth> 1969 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1970 APInt Literal(64, Imm.Val); 1971 setImmKindNone(); 1972 1973 if (!Imm.IsFPImm) { 1974 // We got int literal token. 1975 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1976 return; 1977 } 1978 1979 bool Lost; 1980 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1981 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1982 APFloat::rmNearestTiesToEven, &Lost); 1983 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1984 } 1985 1986 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1987 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1988 } 1989 1990 static bool isInlineValue(unsigned Reg) { 1991 switch (Reg) { 1992 case AMDGPU::SRC_SHARED_BASE: 1993 case AMDGPU::SRC_SHARED_LIMIT: 1994 case AMDGPU::SRC_PRIVATE_BASE: 1995 case AMDGPU::SRC_PRIVATE_LIMIT: 1996 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1997 return true; 1998 case AMDGPU::SRC_VCCZ: 1999 case AMDGPU::SRC_EXECZ: 2000 case AMDGPU::SRC_SCC: 2001 return true; 2002 case AMDGPU::SGPR_NULL: 2003 return true; 2004 default: 2005 return false; 2006 } 2007 } 2008 2009 bool AMDGPUOperand::isInlineValue() const { 2010 return isRegKind() && ::isInlineValue(getReg()); 2011 } 2012 2013 //===----------------------------------------------------------------------===// 2014 // AsmParser 2015 //===----------------------------------------------------------------------===// 2016 2017 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 2018 if (Is == IS_VGPR) { 2019 switch (RegWidth) { 2020 default: return -1; 2021 case 1: return AMDGPU::VGPR_32RegClassID; 2022 case 2: return AMDGPU::VReg_64RegClassID; 2023 case 3: return AMDGPU::VReg_96RegClassID; 2024 case 4: return AMDGPU::VReg_128RegClassID; 2025 case 5: return AMDGPU::VReg_160RegClassID; 2026 case 6: return AMDGPU::VReg_192RegClassID; 2027 case 8: return AMDGPU::VReg_256RegClassID; 2028 case 16: return AMDGPU::VReg_512RegClassID; 2029 case 32: return AMDGPU::VReg_1024RegClassID; 2030 } 2031 } else if (Is == IS_TTMP) { 2032 switch (RegWidth) { 2033 default: return -1; 2034 case 1: return AMDGPU::TTMP_32RegClassID; 2035 case 2: return AMDGPU::TTMP_64RegClassID; 2036 case 4: return AMDGPU::TTMP_128RegClassID; 2037 case 8: return AMDGPU::TTMP_256RegClassID; 2038 case 16: return AMDGPU::TTMP_512RegClassID; 2039 } 2040 } else if (Is == IS_SGPR) { 2041 switch (RegWidth) { 2042 default: return -1; 2043 case 1: return AMDGPU::SGPR_32RegClassID; 2044 case 2: return AMDGPU::SGPR_64RegClassID; 2045 case 3: return AMDGPU::SGPR_96RegClassID; 2046 case 4: return AMDGPU::SGPR_128RegClassID; 2047 case 5: return AMDGPU::SGPR_160RegClassID; 2048 case 6: return AMDGPU::SGPR_192RegClassID; 2049 case 8: return AMDGPU::SGPR_256RegClassID; 2050 case 16: return AMDGPU::SGPR_512RegClassID; 2051 } 2052 } else if (Is == IS_AGPR) { 2053 switch (RegWidth) { 2054 default: return -1; 2055 case 1: return AMDGPU::AGPR_32RegClassID; 2056 case 2: return AMDGPU::AReg_64RegClassID; 2057 case 3: return AMDGPU::AReg_96RegClassID; 2058 case 4: return AMDGPU::AReg_128RegClassID; 2059 case 5: return AMDGPU::AReg_160RegClassID; 2060 case 6: return AMDGPU::AReg_192RegClassID; 2061 case 8: return AMDGPU::AReg_256RegClassID; 2062 case 16: return AMDGPU::AReg_512RegClassID; 2063 case 32: return AMDGPU::AReg_1024RegClassID; 2064 } 2065 } 2066 return -1; 2067 } 2068 2069 static unsigned getSpecialRegForName(StringRef RegName) { 2070 return StringSwitch<unsigned>(RegName) 2071 .Case("exec", AMDGPU::EXEC) 2072 .Case("vcc", AMDGPU::VCC) 2073 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2074 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2075 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2076 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2077 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2078 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2079 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2080 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2081 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2082 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2083 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2084 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2085 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2086 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2087 .Case("m0", AMDGPU::M0) 2088 .Case("vccz", AMDGPU::SRC_VCCZ) 2089 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2090 .Case("execz", AMDGPU::SRC_EXECZ) 2091 .Case("src_execz", AMDGPU::SRC_EXECZ) 2092 .Case("scc", AMDGPU::SRC_SCC) 2093 .Case("src_scc", AMDGPU::SRC_SCC) 2094 .Case("tba", AMDGPU::TBA) 2095 .Case("tma", AMDGPU::TMA) 2096 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2097 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2098 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2099 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2100 .Case("vcc_lo", AMDGPU::VCC_LO) 2101 .Case("vcc_hi", AMDGPU::VCC_HI) 2102 .Case("exec_lo", AMDGPU::EXEC_LO) 2103 .Case("exec_hi", AMDGPU::EXEC_HI) 2104 .Case("tma_lo", AMDGPU::TMA_LO) 2105 .Case("tma_hi", AMDGPU::TMA_HI) 2106 .Case("tba_lo", AMDGPU::TBA_LO) 2107 .Case("tba_hi", AMDGPU::TBA_HI) 2108 .Case("pc", AMDGPU::PC_REG) 2109 .Case("null", AMDGPU::SGPR_NULL) 2110 .Default(AMDGPU::NoRegister); 2111 } 2112 2113 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2114 SMLoc &EndLoc, bool RestoreOnFailure) { 2115 auto R = parseRegister(); 2116 if (!R) return true; 2117 assert(R->isReg()); 2118 RegNo = R->getReg(); 2119 StartLoc = R->getStartLoc(); 2120 EndLoc = R->getEndLoc(); 2121 return false; 2122 } 2123 2124 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2125 SMLoc &EndLoc) { 2126 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2127 } 2128 2129 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2130 SMLoc &StartLoc, 2131 SMLoc &EndLoc) { 2132 bool Result = 2133 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2134 bool PendingErrors = getParser().hasPendingError(); 2135 getParser().clearPendingErrors(); 2136 if (PendingErrors) 2137 return MatchOperand_ParseFail; 2138 if (Result) 2139 return MatchOperand_NoMatch; 2140 return MatchOperand_Success; 2141 } 2142 2143 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2144 RegisterKind RegKind, unsigned Reg1, 2145 SMLoc Loc) { 2146 switch (RegKind) { 2147 case IS_SPECIAL: 2148 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2149 Reg = AMDGPU::EXEC; 2150 RegWidth = 2; 2151 return true; 2152 } 2153 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2154 Reg = AMDGPU::FLAT_SCR; 2155 RegWidth = 2; 2156 return true; 2157 } 2158 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2159 Reg = AMDGPU::XNACK_MASK; 2160 RegWidth = 2; 2161 return true; 2162 } 2163 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2164 Reg = AMDGPU::VCC; 2165 RegWidth = 2; 2166 return true; 2167 } 2168 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2169 Reg = AMDGPU::TBA; 2170 RegWidth = 2; 2171 return true; 2172 } 2173 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2174 Reg = AMDGPU::TMA; 2175 RegWidth = 2; 2176 return true; 2177 } 2178 Error(Loc, "register does not fit in the list"); 2179 return false; 2180 case IS_VGPR: 2181 case IS_SGPR: 2182 case IS_AGPR: 2183 case IS_TTMP: 2184 if (Reg1 != Reg + RegWidth) { 2185 Error(Loc, "registers in a list must have consecutive indices"); 2186 return false; 2187 } 2188 RegWidth++; 2189 return true; 2190 default: 2191 llvm_unreachable("unexpected register kind"); 2192 } 2193 } 2194 2195 struct RegInfo { 2196 StringLiteral Name; 2197 RegisterKind Kind; 2198 }; 2199 2200 static constexpr RegInfo RegularRegisters[] = { 2201 {{"v"}, IS_VGPR}, 2202 {{"s"}, IS_SGPR}, 2203 {{"ttmp"}, IS_TTMP}, 2204 {{"acc"}, IS_AGPR}, 2205 {{"a"}, IS_AGPR}, 2206 }; 2207 2208 static bool isRegularReg(RegisterKind Kind) { 2209 return Kind == IS_VGPR || 2210 Kind == IS_SGPR || 2211 Kind == IS_TTMP || 2212 Kind == IS_AGPR; 2213 } 2214 2215 static const RegInfo* getRegularRegInfo(StringRef Str) { 2216 for (const RegInfo &Reg : RegularRegisters) 2217 if (Str.startswith(Reg.Name)) 2218 return &Reg; 2219 return nullptr; 2220 } 2221 2222 static bool getRegNum(StringRef Str, unsigned& Num) { 2223 return !Str.getAsInteger(10, Num); 2224 } 2225 2226 bool 2227 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2228 const AsmToken &NextToken) const { 2229 2230 // A list of consecutive registers: [s0,s1,s2,s3] 2231 if (Token.is(AsmToken::LBrac)) 2232 return true; 2233 2234 if (!Token.is(AsmToken::Identifier)) 2235 return false; 2236 2237 // A single register like s0 or a range of registers like s[0:1] 2238 2239 StringRef Str = Token.getString(); 2240 const RegInfo *Reg = getRegularRegInfo(Str); 2241 if (Reg) { 2242 StringRef RegName = Reg->Name; 2243 StringRef RegSuffix = Str.substr(RegName.size()); 2244 if (!RegSuffix.empty()) { 2245 unsigned Num; 2246 // A single register with an index: rXX 2247 if (getRegNum(RegSuffix, Num)) 2248 return true; 2249 } else { 2250 // A range of registers: r[XX:YY]. 2251 if (NextToken.is(AsmToken::LBrac)) 2252 return true; 2253 } 2254 } 2255 2256 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2257 } 2258 2259 bool 2260 AMDGPUAsmParser::isRegister() 2261 { 2262 return isRegister(getToken(), peekToken()); 2263 } 2264 2265 unsigned 2266 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2267 unsigned RegNum, 2268 unsigned RegWidth, 2269 SMLoc Loc) { 2270 2271 assert(isRegularReg(RegKind)); 2272 2273 unsigned AlignSize = 1; 2274 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2275 // SGPR and TTMP registers must be aligned. 2276 // Max required alignment is 4 dwords. 2277 AlignSize = std::min(RegWidth, 4u); 2278 } 2279 2280 if (RegNum % AlignSize != 0) { 2281 Error(Loc, "invalid register alignment"); 2282 return AMDGPU::NoRegister; 2283 } 2284 2285 unsigned RegIdx = RegNum / AlignSize; 2286 int RCID = getRegClass(RegKind, RegWidth); 2287 if (RCID == -1) { 2288 Error(Loc, "invalid or unsupported register size"); 2289 return AMDGPU::NoRegister; 2290 } 2291 2292 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2293 const MCRegisterClass RC = TRI->getRegClass(RCID); 2294 if (RegIdx >= RC.getNumRegs()) { 2295 Error(Loc, "register index is out of range"); 2296 return AMDGPU::NoRegister; 2297 } 2298 2299 return RC.getRegister(RegIdx); 2300 } 2301 2302 bool 2303 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2304 int64_t RegLo, RegHi; 2305 if (!skipToken(AsmToken::LBrac, "missing register index")) 2306 return false; 2307 2308 SMLoc FirstIdxLoc = getLoc(); 2309 SMLoc SecondIdxLoc; 2310 2311 if (!parseExpr(RegLo)) 2312 return false; 2313 2314 if (trySkipToken(AsmToken::Colon)) { 2315 SecondIdxLoc = getLoc(); 2316 if (!parseExpr(RegHi)) 2317 return false; 2318 } else { 2319 RegHi = RegLo; 2320 } 2321 2322 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2323 return false; 2324 2325 if (!isUInt<32>(RegLo)) { 2326 Error(FirstIdxLoc, "invalid register index"); 2327 return false; 2328 } 2329 2330 if (!isUInt<32>(RegHi)) { 2331 Error(SecondIdxLoc, "invalid register index"); 2332 return false; 2333 } 2334 2335 if (RegLo > RegHi) { 2336 Error(FirstIdxLoc, "first register index should not exceed second index"); 2337 return false; 2338 } 2339 2340 Num = static_cast<unsigned>(RegLo); 2341 Width = (RegHi - RegLo) + 1; 2342 return true; 2343 } 2344 2345 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2346 unsigned &RegNum, unsigned &RegWidth, 2347 SmallVectorImpl<AsmToken> &Tokens) { 2348 assert(isToken(AsmToken::Identifier)); 2349 unsigned Reg = getSpecialRegForName(getTokenStr()); 2350 if (Reg) { 2351 RegNum = 0; 2352 RegWidth = 1; 2353 RegKind = IS_SPECIAL; 2354 Tokens.push_back(getToken()); 2355 lex(); // skip register name 2356 } 2357 return Reg; 2358 } 2359 2360 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2361 unsigned &RegNum, unsigned &RegWidth, 2362 SmallVectorImpl<AsmToken> &Tokens) { 2363 assert(isToken(AsmToken::Identifier)); 2364 StringRef RegName = getTokenStr(); 2365 auto Loc = getLoc(); 2366 2367 const RegInfo *RI = getRegularRegInfo(RegName); 2368 if (!RI) { 2369 Error(Loc, "invalid register name"); 2370 return AMDGPU::NoRegister; 2371 } 2372 2373 Tokens.push_back(getToken()); 2374 lex(); // skip register name 2375 2376 RegKind = RI->Kind; 2377 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2378 if (!RegSuffix.empty()) { 2379 // Single 32-bit register: vXX. 2380 if (!getRegNum(RegSuffix, RegNum)) { 2381 Error(Loc, "invalid register index"); 2382 return AMDGPU::NoRegister; 2383 } 2384 RegWidth = 1; 2385 } else { 2386 // Range of registers: v[XX:YY]. ":YY" is optional. 2387 if (!ParseRegRange(RegNum, RegWidth)) 2388 return AMDGPU::NoRegister; 2389 } 2390 2391 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2392 } 2393 2394 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2395 unsigned &RegWidth, 2396 SmallVectorImpl<AsmToken> &Tokens) { 2397 unsigned Reg = AMDGPU::NoRegister; 2398 auto ListLoc = getLoc(); 2399 2400 if (!skipToken(AsmToken::LBrac, 2401 "expected a register or a list of registers")) { 2402 return AMDGPU::NoRegister; 2403 } 2404 2405 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2406 2407 auto Loc = getLoc(); 2408 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2409 return AMDGPU::NoRegister; 2410 if (RegWidth != 1) { 2411 Error(Loc, "expected a single 32-bit register"); 2412 return AMDGPU::NoRegister; 2413 } 2414 2415 for (; trySkipToken(AsmToken::Comma); ) { 2416 RegisterKind NextRegKind; 2417 unsigned NextReg, NextRegNum, NextRegWidth; 2418 Loc = getLoc(); 2419 2420 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2421 NextRegNum, NextRegWidth, 2422 Tokens)) { 2423 return AMDGPU::NoRegister; 2424 } 2425 if (NextRegWidth != 1) { 2426 Error(Loc, "expected a single 32-bit register"); 2427 return AMDGPU::NoRegister; 2428 } 2429 if (NextRegKind != RegKind) { 2430 Error(Loc, "registers in a list must be of the same kind"); 2431 return AMDGPU::NoRegister; 2432 } 2433 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2434 return AMDGPU::NoRegister; 2435 } 2436 2437 if (!skipToken(AsmToken::RBrac, 2438 "expected a comma or a closing square bracket")) { 2439 return AMDGPU::NoRegister; 2440 } 2441 2442 if (isRegularReg(RegKind)) 2443 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2444 2445 return Reg; 2446 } 2447 2448 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2449 unsigned &RegNum, unsigned &RegWidth, 2450 SmallVectorImpl<AsmToken> &Tokens) { 2451 auto Loc = getLoc(); 2452 Reg = AMDGPU::NoRegister; 2453 2454 if (isToken(AsmToken::Identifier)) { 2455 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2456 if (Reg == AMDGPU::NoRegister) 2457 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2458 } else { 2459 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2460 } 2461 2462 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2463 if (Reg == AMDGPU::NoRegister) { 2464 assert(Parser.hasPendingError()); 2465 return false; 2466 } 2467 2468 if (!subtargetHasRegister(*TRI, Reg)) { 2469 if (Reg == AMDGPU::SGPR_NULL) { 2470 Error(Loc, "'null' operand is not supported on this GPU"); 2471 } else { 2472 Error(Loc, "register not available on this GPU"); 2473 } 2474 return false; 2475 } 2476 2477 return true; 2478 } 2479 2480 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2481 unsigned &RegNum, unsigned &RegWidth, 2482 bool RestoreOnFailure /*=false*/) { 2483 Reg = AMDGPU::NoRegister; 2484 2485 SmallVector<AsmToken, 1> Tokens; 2486 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2487 if (RestoreOnFailure) { 2488 while (!Tokens.empty()) { 2489 getLexer().UnLex(Tokens.pop_back_val()); 2490 } 2491 } 2492 return true; 2493 } 2494 return false; 2495 } 2496 2497 Optional<StringRef> 2498 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2499 switch (RegKind) { 2500 case IS_VGPR: 2501 return StringRef(".amdgcn.next_free_vgpr"); 2502 case IS_SGPR: 2503 return StringRef(".amdgcn.next_free_sgpr"); 2504 default: 2505 return None; 2506 } 2507 } 2508 2509 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2510 auto SymbolName = getGprCountSymbolName(RegKind); 2511 assert(SymbolName && "initializing invalid register kind"); 2512 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2513 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2514 } 2515 2516 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2517 unsigned DwordRegIndex, 2518 unsigned RegWidth) { 2519 // Symbols are only defined for GCN targets 2520 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2521 return true; 2522 2523 auto SymbolName = getGprCountSymbolName(RegKind); 2524 if (!SymbolName) 2525 return true; 2526 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2527 2528 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2529 int64_t OldCount; 2530 2531 if (!Sym->isVariable()) 2532 return !Error(getParser().getTok().getLoc(), 2533 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2534 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2535 return !Error( 2536 getParser().getTok().getLoc(), 2537 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2538 2539 if (OldCount <= NewMax) 2540 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2541 2542 return true; 2543 } 2544 2545 std::unique_ptr<AMDGPUOperand> 2546 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2547 const auto &Tok = Parser.getTok(); 2548 SMLoc StartLoc = Tok.getLoc(); 2549 SMLoc EndLoc = Tok.getEndLoc(); 2550 RegisterKind RegKind; 2551 unsigned Reg, RegNum, RegWidth; 2552 2553 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2554 return nullptr; 2555 } 2556 if (isHsaAbiVersion3(&getSTI())) { 2557 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2558 return nullptr; 2559 } else 2560 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2561 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2562 } 2563 2564 OperandMatchResultTy 2565 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2566 // TODO: add syntactic sugar for 1/(2*PI) 2567 2568 assert(!isRegister()); 2569 assert(!isModifier()); 2570 2571 const auto& Tok = getToken(); 2572 const auto& NextTok = peekToken(); 2573 bool IsReal = Tok.is(AsmToken::Real); 2574 SMLoc S = getLoc(); 2575 bool Negate = false; 2576 2577 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2578 lex(); 2579 IsReal = true; 2580 Negate = true; 2581 } 2582 2583 if (IsReal) { 2584 // Floating-point expressions are not supported. 2585 // Can only allow floating-point literals with an 2586 // optional sign. 2587 2588 StringRef Num = getTokenStr(); 2589 lex(); 2590 2591 APFloat RealVal(APFloat::IEEEdouble()); 2592 auto roundMode = APFloat::rmNearestTiesToEven; 2593 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2594 return MatchOperand_ParseFail; 2595 } 2596 if (Negate) 2597 RealVal.changeSign(); 2598 2599 Operands.push_back( 2600 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2601 AMDGPUOperand::ImmTyNone, true)); 2602 2603 return MatchOperand_Success; 2604 2605 } else { 2606 int64_t IntVal; 2607 const MCExpr *Expr; 2608 SMLoc S = getLoc(); 2609 2610 if (HasSP3AbsModifier) { 2611 // This is a workaround for handling expressions 2612 // as arguments of SP3 'abs' modifier, for example: 2613 // |1.0| 2614 // |-1| 2615 // |1+x| 2616 // This syntax is not compatible with syntax of standard 2617 // MC expressions (due to the trailing '|'). 2618 SMLoc EndLoc; 2619 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2620 return MatchOperand_ParseFail; 2621 } else { 2622 if (Parser.parseExpression(Expr)) 2623 return MatchOperand_ParseFail; 2624 } 2625 2626 if (Expr->evaluateAsAbsolute(IntVal)) { 2627 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2628 } else { 2629 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2630 } 2631 2632 return MatchOperand_Success; 2633 } 2634 2635 return MatchOperand_NoMatch; 2636 } 2637 2638 OperandMatchResultTy 2639 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2640 if (!isRegister()) 2641 return MatchOperand_NoMatch; 2642 2643 if (auto R = parseRegister()) { 2644 assert(R->isReg()); 2645 Operands.push_back(std::move(R)); 2646 return MatchOperand_Success; 2647 } 2648 return MatchOperand_ParseFail; 2649 } 2650 2651 OperandMatchResultTy 2652 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2653 auto res = parseReg(Operands); 2654 if (res != MatchOperand_NoMatch) { 2655 return res; 2656 } else if (isModifier()) { 2657 return MatchOperand_NoMatch; 2658 } else { 2659 return parseImm(Operands, HasSP3AbsMod); 2660 } 2661 } 2662 2663 bool 2664 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2665 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2666 const auto &str = Token.getString(); 2667 return str == "abs" || str == "neg" || str == "sext"; 2668 } 2669 return false; 2670 } 2671 2672 bool 2673 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2674 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2675 } 2676 2677 bool 2678 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2679 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2680 } 2681 2682 bool 2683 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2684 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2685 } 2686 2687 // Check if this is an operand modifier or an opcode modifier 2688 // which may look like an expression but it is not. We should 2689 // avoid parsing these modifiers as expressions. Currently 2690 // recognized sequences are: 2691 // |...| 2692 // abs(...) 2693 // neg(...) 2694 // sext(...) 2695 // -reg 2696 // -|...| 2697 // -abs(...) 2698 // name:... 2699 // Note that simple opcode modifiers like 'gds' may be parsed as 2700 // expressions; this is a special case. See getExpressionAsToken. 2701 // 2702 bool 2703 AMDGPUAsmParser::isModifier() { 2704 2705 AsmToken Tok = getToken(); 2706 AsmToken NextToken[2]; 2707 peekTokens(NextToken); 2708 2709 return isOperandModifier(Tok, NextToken[0]) || 2710 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2711 isOpcodeModifierWithVal(Tok, NextToken[0]); 2712 } 2713 2714 // Check if the current token is an SP3 'neg' modifier. 2715 // Currently this modifier is allowed in the following context: 2716 // 2717 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2718 // 2. Before an 'abs' modifier: -abs(...) 2719 // 3. Before an SP3 'abs' modifier: -|...| 2720 // 2721 // In all other cases "-" is handled as a part 2722 // of an expression that follows the sign. 2723 // 2724 // Note: When "-" is followed by an integer literal, 2725 // this is interpreted as integer negation rather 2726 // than a floating-point NEG modifier applied to N. 2727 // Beside being contr-intuitive, such use of floating-point 2728 // NEG modifier would have resulted in different meaning 2729 // of integer literals used with VOP1/2/C and VOP3, 2730 // for example: 2731 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2732 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2733 // Negative fp literals with preceding "-" are 2734 // handled likewise for unifomtity 2735 // 2736 bool 2737 AMDGPUAsmParser::parseSP3NegModifier() { 2738 2739 AsmToken NextToken[2]; 2740 peekTokens(NextToken); 2741 2742 if (isToken(AsmToken::Minus) && 2743 (isRegister(NextToken[0], NextToken[1]) || 2744 NextToken[0].is(AsmToken::Pipe) || 2745 isId(NextToken[0], "abs"))) { 2746 lex(); 2747 return true; 2748 } 2749 2750 return false; 2751 } 2752 2753 OperandMatchResultTy 2754 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2755 bool AllowImm) { 2756 bool Neg, SP3Neg; 2757 bool Abs, SP3Abs; 2758 SMLoc Loc; 2759 2760 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2761 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2762 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2763 return MatchOperand_ParseFail; 2764 } 2765 2766 SP3Neg = parseSP3NegModifier(); 2767 2768 Loc = getLoc(); 2769 Neg = trySkipId("neg"); 2770 if (Neg && SP3Neg) { 2771 Error(Loc, "expected register or immediate"); 2772 return MatchOperand_ParseFail; 2773 } 2774 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2775 return MatchOperand_ParseFail; 2776 2777 Abs = trySkipId("abs"); 2778 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2779 return MatchOperand_ParseFail; 2780 2781 Loc = getLoc(); 2782 SP3Abs = trySkipToken(AsmToken::Pipe); 2783 if (Abs && SP3Abs) { 2784 Error(Loc, "expected register or immediate"); 2785 return MatchOperand_ParseFail; 2786 } 2787 2788 OperandMatchResultTy Res; 2789 if (AllowImm) { 2790 Res = parseRegOrImm(Operands, SP3Abs); 2791 } else { 2792 Res = parseReg(Operands); 2793 } 2794 if (Res != MatchOperand_Success) { 2795 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2796 } 2797 2798 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2799 return MatchOperand_ParseFail; 2800 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2801 return MatchOperand_ParseFail; 2802 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2803 return MatchOperand_ParseFail; 2804 2805 AMDGPUOperand::Modifiers Mods; 2806 Mods.Abs = Abs || SP3Abs; 2807 Mods.Neg = Neg || SP3Neg; 2808 2809 if (Mods.hasFPModifiers()) { 2810 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2811 if (Op.isExpr()) { 2812 Error(Op.getStartLoc(), "expected an absolute expression"); 2813 return MatchOperand_ParseFail; 2814 } 2815 Op.setModifiers(Mods); 2816 } 2817 return MatchOperand_Success; 2818 } 2819 2820 OperandMatchResultTy 2821 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2822 bool AllowImm) { 2823 bool Sext = trySkipId("sext"); 2824 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2825 return MatchOperand_ParseFail; 2826 2827 OperandMatchResultTy Res; 2828 if (AllowImm) { 2829 Res = parseRegOrImm(Operands); 2830 } else { 2831 Res = parseReg(Operands); 2832 } 2833 if (Res != MatchOperand_Success) { 2834 return Sext? MatchOperand_ParseFail : Res; 2835 } 2836 2837 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2838 return MatchOperand_ParseFail; 2839 2840 AMDGPUOperand::Modifiers Mods; 2841 Mods.Sext = Sext; 2842 2843 if (Mods.hasIntModifiers()) { 2844 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2845 if (Op.isExpr()) { 2846 Error(Op.getStartLoc(), "expected an absolute expression"); 2847 return MatchOperand_ParseFail; 2848 } 2849 Op.setModifiers(Mods); 2850 } 2851 2852 return MatchOperand_Success; 2853 } 2854 2855 OperandMatchResultTy 2856 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2857 return parseRegOrImmWithFPInputMods(Operands, false); 2858 } 2859 2860 OperandMatchResultTy 2861 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2862 return parseRegOrImmWithIntInputMods(Operands, false); 2863 } 2864 2865 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2866 auto Loc = getLoc(); 2867 if (trySkipId("off")) { 2868 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2869 AMDGPUOperand::ImmTyOff, false)); 2870 return MatchOperand_Success; 2871 } 2872 2873 if (!isRegister()) 2874 return MatchOperand_NoMatch; 2875 2876 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2877 if (Reg) { 2878 Operands.push_back(std::move(Reg)); 2879 return MatchOperand_Success; 2880 } 2881 2882 return MatchOperand_ParseFail; 2883 2884 } 2885 2886 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2887 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2888 2889 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2890 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2891 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2892 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2893 return Match_InvalidOperand; 2894 2895 if ((TSFlags & SIInstrFlags::VOP3) && 2896 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2897 getForcedEncodingSize() != 64) 2898 return Match_PreferE32; 2899 2900 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2901 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2902 // v_mac_f32/16 allow only dst_sel == DWORD; 2903 auto OpNum = 2904 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2905 const auto &Op = Inst.getOperand(OpNum); 2906 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2907 return Match_InvalidOperand; 2908 } 2909 } 2910 2911 return Match_Success; 2912 } 2913 2914 static ArrayRef<unsigned> getAllVariants() { 2915 static const unsigned Variants[] = { 2916 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2917 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2918 }; 2919 2920 return makeArrayRef(Variants); 2921 } 2922 2923 // What asm variants we should check 2924 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2925 if (getForcedEncodingSize() == 32) { 2926 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2927 return makeArrayRef(Variants); 2928 } 2929 2930 if (isForcedVOP3()) { 2931 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2932 return makeArrayRef(Variants); 2933 } 2934 2935 if (isForcedSDWA()) { 2936 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2937 AMDGPUAsmVariants::SDWA9}; 2938 return makeArrayRef(Variants); 2939 } 2940 2941 if (isForcedDPP()) { 2942 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2943 return makeArrayRef(Variants); 2944 } 2945 2946 return getAllVariants(); 2947 } 2948 2949 StringRef AMDGPUAsmParser::getMatchedVariantName() const { 2950 if (getForcedEncodingSize() == 32) 2951 return "e32"; 2952 2953 if (isForcedVOP3()) 2954 return "e64"; 2955 2956 if (isForcedSDWA()) 2957 return "sdwa"; 2958 2959 if (isForcedDPP()) 2960 return "dpp"; 2961 2962 return ""; 2963 } 2964 2965 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2966 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2967 const unsigned Num = Desc.getNumImplicitUses(); 2968 for (unsigned i = 0; i < Num; ++i) { 2969 unsigned Reg = Desc.ImplicitUses[i]; 2970 switch (Reg) { 2971 case AMDGPU::FLAT_SCR: 2972 case AMDGPU::VCC: 2973 case AMDGPU::VCC_LO: 2974 case AMDGPU::VCC_HI: 2975 case AMDGPU::M0: 2976 return Reg; 2977 default: 2978 break; 2979 } 2980 } 2981 return AMDGPU::NoRegister; 2982 } 2983 2984 // NB: This code is correct only when used to check constant 2985 // bus limitations because GFX7 support no f16 inline constants. 2986 // Note that there are no cases when a GFX7 opcode violates 2987 // constant bus limitations due to the use of an f16 constant. 2988 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2989 unsigned OpIdx) const { 2990 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2991 2992 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2993 return false; 2994 } 2995 2996 const MCOperand &MO = Inst.getOperand(OpIdx); 2997 2998 int64_t Val = MO.getImm(); 2999 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 3000 3001 switch (OpSize) { // expected operand size 3002 case 8: 3003 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 3004 case 4: 3005 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 3006 case 2: { 3007 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 3008 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 3009 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 3010 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 3011 return AMDGPU::isInlinableIntLiteral(Val); 3012 3013 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 3014 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 3015 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 3016 return AMDGPU::isInlinableIntLiteralV216(Val); 3017 3018 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 3019 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 3020 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 3021 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 3022 3023 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 3024 } 3025 default: 3026 llvm_unreachable("invalid operand size"); 3027 } 3028 } 3029 3030 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 3031 if (!isGFX10Plus()) 3032 return 1; 3033 3034 switch (Opcode) { 3035 // 64-bit shift instructions can use only one scalar value input 3036 case AMDGPU::V_LSHLREV_B64: 3037 case AMDGPU::V_LSHLREV_B64_gfx10: 3038 case AMDGPU::V_LSHL_B64: 3039 case AMDGPU::V_LSHRREV_B64: 3040 case AMDGPU::V_LSHRREV_B64_gfx10: 3041 case AMDGPU::V_LSHR_B64: 3042 case AMDGPU::V_ASHRREV_I64: 3043 case AMDGPU::V_ASHRREV_I64_gfx10: 3044 case AMDGPU::V_ASHR_I64: 3045 return 1; 3046 default: 3047 return 2; 3048 } 3049 } 3050 3051 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 3052 const MCOperand &MO = Inst.getOperand(OpIdx); 3053 if (MO.isImm()) { 3054 return !isInlineConstant(Inst, OpIdx); 3055 } else if (MO.isReg()) { 3056 auto Reg = MO.getReg(); 3057 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3058 auto PReg = mc2PseudoReg(Reg); 3059 return isSGPR(PReg, TRI) && PReg != SGPR_NULL; 3060 } else { 3061 return true; 3062 } 3063 } 3064 3065 bool 3066 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst, 3067 const OperandVector &Operands) { 3068 const unsigned Opcode = Inst.getOpcode(); 3069 const MCInstrDesc &Desc = MII.get(Opcode); 3070 unsigned LastSGPR = AMDGPU::NoRegister; 3071 unsigned ConstantBusUseCount = 0; 3072 unsigned NumLiterals = 0; 3073 unsigned LiteralSize; 3074 3075 if (Desc.TSFlags & 3076 (SIInstrFlags::VOPC | 3077 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 3078 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 3079 SIInstrFlags::SDWA)) { 3080 // Check special imm operands (used by madmk, etc) 3081 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 3082 ++ConstantBusUseCount; 3083 } 3084 3085 SmallDenseSet<unsigned> SGPRsUsed; 3086 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 3087 if (SGPRUsed != AMDGPU::NoRegister) { 3088 SGPRsUsed.insert(SGPRUsed); 3089 ++ConstantBusUseCount; 3090 } 3091 3092 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3093 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3094 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3095 3096 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3097 3098 for (int OpIdx : OpIndices) { 3099 if (OpIdx == -1) break; 3100 3101 const MCOperand &MO = Inst.getOperand(OpIdx); 3102 if (usesConstantBus(Inst, OpIdx)) { 3103 if (MO.isReg()) { 3104 LastSGPR = mc2PseudoReg(MO.getReg()); 3105 // Pairs of registers with a partial intersections like these 3106 // s0, s[0:1] 3107 // flat_scratch_lo, flat_scratch 3108 // flat_scratch_lo, flat_scratch_hi 3109 // are theoretically valid but they are disabled anyway. 3110 // Note that this code mimics SIInstrInfo::verifyInstruction 3111 if (!SGPRsUsed.count(LastSGPR)) { 3112 SGPRsUsed.insert(LastSGPR); 3113 ++ConstantBusUseCount; 3114 } 3115 } else { // Expression or a literal 3116 3117 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3118 continue; // special operand like VINTERP attr_chan 3119 3120 // An instruction may use only one literal. 3121 // This has been validated on the previous step. 3122 // See validateVOP3Literal. 3123 // This literal may be used as more than one operand. 3124 // If all these operands are of the same size, 3125 // this literal counts as one scalar value. 3126 // Otherwise it counts as 2 scalar values. 3127 // See "GFX10 Shader Programming", section 3.6.2.3. 3128 3129 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3130 if (Size < 4) Size = 4; 3131 3132 if (NumLiterals == 0) { 3133 NumLiterals = 1; 3134 LiteralSize = Size; 3135 } else if (LiteralSize != Size) { 3136 NumLiterals = 2; 3137 } 3138 } 3139 } 3140 } 3141 } 3142 ConstantBusUseCount += NumLiterals; 3143 3144 if (ConstantBusUseCount <= getConstantBusLimit(Opcode)) 3145 return true; 3146 3147 SMLoc LitLoc = getLitLoc(Operands); 3148 SMLoc RegLoc = getRegLoc(LastSGPR, Operands); 3149 SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc; 3150 Error(Loc, "invalid operand (violates constant bus restrictions)"); 3151 return false; 3152 } 3153 3154 bool 3155 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst, 3156 const OperandVector &Operands) { 3157 const unsigned Opcode = Inst.getOpcode(); 3158 const MCInstrDesc &Desc = MII.get(Opcode); 3159 3160 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3161 if (DstIdx == -1 || 3162 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3163 return true; 3164 } 3165 3166 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3167 3168 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3169 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3170 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3171 3172 assert(DstIdx != -1); 3173 const MCOperand &Dst = Inst.getOperand(DstIdx); 3174 assert(Dst.isReg()); 3175 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3176 3177 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3178 3179 for (int SrcIdx : SrcIndices) { 3180 if (SrcIdx == -1) break; 3181 const MCOperand &Src = Inst.getOperand(SrcIdx); 3182 if (Src.isReg()) { 3183 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3184 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3185 Error(getRegLoc(SrcReg, Operands), 3186 "destination must be different than all sources"); 3187 return false; 3188 } 3189 } 3190 } 3191 3192 return true; 3193 } 3194 3195 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3196 3197 const unsigned Opc = Inst.getOpcode(); 3198 const MCInstrDesc &Desc = MII.get(Opc); 3199 3200 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3201 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3202 assert(ClampIdx != -1); 3203 return Inst.getOperand(ClampIdx).getImm() == 0; 3204 } 3205 3206 return true; 3207 } 3208 3209 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3210 3211 const unsigned Opc = Inst.getOpcode(); 3212 const MCInstrDesc &Desc = MII.get(Opc); 3213 3214 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3215 return true; 3216 3217 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3218 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3219 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3220 3221 assert(VDataIdx != -1); 3222 3223 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3224 return true; 3225 3226 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3227 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3228 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3229 if (DMask == 0) 3230 DMask = 1; 3231 3232 unsigned DataSize = 3233 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3234 if (hasPackedD16()) { 3235 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3236 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3237 DataSize = (DataSize + 1) / 2; 3238 } 3239 3240 return (VDataSize / 4) == DataSize + TFESize; 3241 } 3242 3243 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3244 const unsigned Opc = Inst.getOpcode(); 3245 const MCInstrDesc &Desc = MII.get(Opc); 3246 3247 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus()) 3248 return true; 3249 3250 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3251 3252 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3253 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3254 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3255 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3256 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3257 3258 assert(VAddr0Idx != -1); 3259 assert(SrsrcIdx != -1); 3260 assert(SrsrcIdx > VAddr0Idx); 3261 3262 if (DimIdx == -1) 3263 return true; // intersect_ray 3264 3265 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3266 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3267 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3268 unsigned VAddrSize = 3269 IsNSA ? SrsrcIdx - VAddr0Idx 3270 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3271 3272 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3273 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3274 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3275 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3276 if (!IsNSA) { 3277 if (AddrSize > 8) 3278 AddrSize = 16; 3279 else if (AddrSize > 4) 3280 AddrSize = 8; 3281 } 3282 3283 return VAddrSize == AddrSize; 3284 } 3285 3286 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3287 3288 const unsigned Opc = Inst.getOpcode(); 3289 const MCInstrDesc &Desc = MII.get(Opc); 3290 3291 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3292 return true; 3293 if (!Desc.mayLoad() || !Desc.mayStore()) 3294 return true; // Not atomic 3295 3296 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3297 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3298 3299 // This is an incomplete check because image_atomic_cmpswap 3300 // may only use 0x3 and 0xf while other atomic operations 3301 // may use 0x1 and 0x3. However these limitations are 3302 // verified when we check that dmask matches dst size. 3303 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3304 } 3305 3306 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3307 3308 const unsigned Opc = Inst.getOpcode(); 3309 const MCInstrDesc &Desc = MII.get(Opc); 3310 3311 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3312 return true; 3313 3314 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3315 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3316 3317 // GATHER4 instructions use dmask in a different fashion compared to 3318 // other MIMG instructions. The only useful DMASK values are 3319 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3320 // (red,red,red,red) etc.) The ISA document doesn't mention 3321 // this. 3322 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3323 } 3324 3325 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3326 { 3327 switch (Opcode) { 3328 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3329 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3330 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3331 return true; 3332 default: 3333 return false; 3334 } 3335 } 3336 3337 // movrels* opcodes should only allow VGPRS as src0. 3338 // This is specified in .td description for vop1/vop3, 3339 // but sdwa is handled differently. See isSDWAOperand. 3340 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst, 3341 const OperandVector &Operands) { 3342 3343 const unsigned Opc = Inst.getOpcode(); 3344 const MCInstrDesc &Desc = MII.get(Opc); 3345 3346 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3347 return true; 3348 3349 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3350 assert(Src0Idx != -1); 3351 3352 SMLoc ErrLoc; 3353 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3354 if (Src0.isReg()) { 3355 auto Reg = mc2PseudoReg(Src0.getReg()); 3356 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3357 if (!isSGPR(Reg, TRI)) 3358 return true; 3359 ErrLoc = getRegLoc(Reg, Operands); 3360 } else { 3361 ErrLoc = getConstLoc(Operands); 3362 } 3363 3364 Error(ErrLoc, "source operand must be a VGPR"); 3365 return false; 3366 } 3367 3368 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst, 3369 const OperandVector &Operands) { 3370 3371 const unsigned Opc = Inst.getOpcode(); 3372 3373 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3374 return true; 3375 3376 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3377 assert(Src0Idx != -1); 3378 3379 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3380 if (!Src0.isReg()) 3381 return true; 3382 3383 auto Reg = mc2PseudoReg(Src0.getReg()); 3384 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3385 if (isSGPR(Reg, TRI)) { 3386 Error(getRegLoc(Reg, Operands), 3387 "source operand must be either a VGPR or an inline constant"); 3388 return false; 3389 } 3390 3391 return true; 3392 } 3393 3394 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) { 3395 switch (Inst.getOpcode()) { 3396 default: 3397 return true; 3398 case V_DIV_SCALE_F32_gfx6_gfx7: 3399 case V_DIV_SCALE_F32_vi: 3400 case V_DIV_SCALE_F32_gfx10: 3401 case V_DIV_SCALE_F64_gfx6_gfx7: 3402 case V_DIV_SCALE_F64_vi: 3403 case V_DIV_SCALE_F64_gfx10: 3404 break; 3405 } 3406 3407 // TODO: Check that src0 = src1 or src2. 3408 3409 for (auto Name : {AMDGPU::OpName::src0_modifiers, 3410 AMDGPU::OpName::src2_modifiers, 3411 AMDGPU::OpName::src2_modifiers}) { 3412 if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name)) 3413 .getImm() & 3414 SISrcMods::ABS) { 3415 return false; 3416 } 3417 } 3418 3419 return true; 3420 } 3421 3422 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3423 3424 const unsigned Opc = Inst.getOpcode(); 3425 const MCInstrDesc &Desc = MII.get(Opc); 3426 3427 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3428 return true; 3429 3430 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3431 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3432 if (isCI() || isSI()) 3433 return false; 3434 } 3435 3436 return true; 3437 } 3438 3439 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3440 const unsigned Opc = Inst.getOpcode(); 3441 const MCInstrDesc &Desc = MII.get(Opc); 3442 3443 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3444 return true; 3445 3446 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3447 if (DimIdx < 0) 3448 return true; 3449 3450 long Imm = Inst.getOperand(DimIdx).getImm(); 3451 if (Imm < 0 || Imm >= 8) 3452 return false; 3453 3454 return true; 3455 } 3456 3457 static bool IsRevOpcode(const unsigned Opcode) 3458 { 3459 switch (Opcode) { 3460 case AMDGPU::V_SUBREV_F32_e32: 3461 case AMDGPU::V_SUBREV_F32_e64: 3462 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3463 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3464 case AMDGPU::V_SUBREV_F32_e32_vi: 3465 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3466 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3467 case AMDGPU::V_SUBREV_F32_e64_vi: 3468 3469 case AMDGPU::V_SUBREV_CO_U32_e32: 3470 case AMDGPU::V_SUBREV_CO_U32_e64: 3471 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3472 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3473 3474 case AMDGPU::V_SUBBREV_U32_e32: 3475 case AMDGPU::V_SUBBREV_U32_e64: 3476 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3477 case AMDGPU::V_SUBBREV_U32_e32_vi: 3478 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3479 case AMDGPU::V_SUBBREV_U32_e64_vi: 3480 3481 case AMDGPU::V_SUBREV_U32_e32: 3482 case AMDGPU::V_SUBREV_U32_e64: 3483 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3484 case AMDGPU::V_SUBREV_U32_e32_vi: 3485 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3486 case AMDGPU::V_SUBREV_U32_e64_vi: 3487 3488 case AMDGPU::V_SUBREV_F16_e32: 3489 case AMDGPU::V_SUBREV_F16_e64: 3490 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3491 case AMDGPU::V_SUBREV_F16_e32_vi: 3492 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3493 case AMDGPU::V_SUBREV_F16_e64_vi: 3494 3495 case AMDGPU::V_SUBREV_U16_e32: 3496 case AMDGPU::V_SUBREV_U16_e64: 3497 case AMDGPU::V_SUBREV_U16_e32_vi: 3498 case AMDGPU::V_SUBREV_U16_e64_vi: 3499 3500 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3501 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3502 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3503 3504 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3505 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3506 3507 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3508 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3509 3510 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3511 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3512 3513 case AMDGPU::V_LSHRREV_B32_e32: 3514 case AMDGPU::V_LSHRREV_B32_e64: 3515 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3516 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3517 case AMDGPU::V_LSHRREV_B32_e32_vi: 3518 case AMDGPU::V_LSHRREV_B32_e64_vi: 3519 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3520 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3521 3522 case AMDGPU::V_ASHRREV_I32_e32: 3523 case AMDGPU::V_ASHRREV_I32_e64: 3524 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3525 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3526 case AMDGPU::V_ASHRREV_I32_e32_vi: 3527 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3528 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3529 case AMDGPU::V_ASHRREV_I32_e64_vi: 3530 3531 case AMDGPU::V_LSHLREV_B32_e32: 3532 case AMDGPU::V_LSHLREV_B32_e64: 3533 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3534 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3535 case AMDGPU::V_LSHLREV_B32_e32_vi: 3536 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3537 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3538 case AMDGPU::V_LSHLREV_B32_e64_vi: 3539 3540 case AMDGPU::V_LSHLREV_B16_e32: 3541 case AMDGPU::V_LSHLREV_B16_e64: 3542 case AMDGPU::V_LSHLREV_B16_e32_vi: 3543 case AMDGPU::V_LSHLREV_B16_e64_vi: 3544 case AMDGPU::V_LSHLREV_B16_gfx10: 3545 3546 case AMDGPU::V_LSHRREV_B16_e32: 3547 case AMDGPU::V_LSHRREV_B16_e64: 3548 case AMDGPU::V_LSHRREV_B16_e32_vi: 3549 case AMDGPU::V_LSHRREV_B16_e64_vi: 3550 case AMDGPU::V_LSHRREV_B16_gfx10: 3551 3552 case AMDGPU::V_ASHRREV_I16_e32: 3553 case AMDGPU::V_ASHRREV_I16_e64: 3554 case AMDGPU::V_ASHRREV_I16_e32_vi: 3555 case AMDGPU::V_ASHRREV_I16_e64_vi: 3556 case AMDGPU::V_ASHRREV_I16_gfx10: 3557 3558 case AMDGPU::V_LSHLREV_B64: 3559 case AMDGPU::V_LSHLREV_B64_gfx10: 3560 case AMDGPU::V_LSHLREV_B64_vi: 3561 3562 case AMDGPU::V_LSHRREV_B64: 3563 case AMDGPU::V_LSHRREV_B64_gfx10: 3564 case AMDGPU::V_LSHRREV_B64_vi: 3565 3566 case AMDGPU::V_ASHRREV_I64: 3567 case AMDGPU::V_ASHRREV_I64_gfx10: 3568 case AMDGPU::V_ASHRREV_I64_vi: 3569 3570 case AMDGPU::V_PK_LSHLREV_B16: 3571 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3572 case AMDGPU::V_PK_LSHLREV_B16_vi: 3573 3574 case AMDGPU::V_PK_LSHRREV_B16: 3575 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3576 case AMDGPU::V_PK_LSHRREV_B16_vi: 3577 case AMDGPU::V_PK_ASHRREV_I16: 3578 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3579 case AMDGPU::V_PK_ASHRREV_I16_vi: 3580 return true; 3581 default: 3582 return false; 3583 } 3584 } 3585 3586 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3587 3588 using namespace SIInstrFlags; 3589 const unsigned Opcode = Inst.getOpcode(); 3590 const MCInstrDesc &Desc = MII.get(Opcode); 3591 3592 // lds_direct register is defined so that it can be used 3593 // with 9-bit operands only. Ignore encodings which do not accept these. 3594 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3595 return true; 3596 3597 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3598 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3599 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3600 3601 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3602 3603 // lds_direct cannot be specified as either src1 or src2. 3604 for (int SrcIdx : SrcIndices) { 3605 if (SrcIdx == -1) break; 3606 const MCOperand &Src = Inst.getOperand(SrcIdx); 3607 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3608 return false; 3609 } 3610 } 3611 3612 if (Src0Idx == -1) 3613 return true; 3614 3615 const MCOperand &Src = Inst.getOperand(Src0Idx); 3616 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3617 return true; 3618 3619 // lds_direct is specified as src0. Check additional limitations. 3620 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3621 } 3622 3623 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3624 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3625 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3626 if (Op.isFlatOffset()) 3627 return Op.getStartLoc(); 3628 } 3629 return getLoc(); 3630 } 3631 3632 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3633 const OperandVector &Operands) { 3634 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3635 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3636 return true; 3637 3638 auto Opcode = Inst.getOpcode(); 3639 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3640 assert(OpNum != -1); 3641 3642 const auto &Op = Inst.getOperand(OpNum); 3643 if (!hasFlatOffsets() && Op.getImm() != 0) { 3644 Error(getFlatOffsetLoc(Operands), 3645 "flat offset modifier is not supported on this GPU"); 3646 return false; 3647 } 3648 3649 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3650 // For FLAT segment the offset must be positive; 3651 // MSB is ignored and forced to zero. 3652 unsigned OffsetSize = isGFX9() ? 13 : 12; 3653 if (TSFlags & (SIInstrFlags::IsFlatGlobal | SIInstrFlags::IsFlatScratch)) { 3654 if (!isIntN(OffsetSize, Op.getImm())) { 3655 Error(getFlatOffsetLoc(Operands), 3656 isGFX9() ? "expected a 13-bit signed offset" : 3657 "expected a 12-bit signed offset"); 3658 return false; 3659 } 3660 } else { 3661 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3662 Error(getFlatOffsetLoc(Operands), 3663 isGFX9() ? "expected a 12-bit unsigned offset" : 3664 "expected an 11-bit unsigned offset"); 3665 return false; 3666 } 3667 } 3668 3669 return true; 3670 } 3671 3672 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3673 // Start with second operand because SMEM Offset cannot be dst or src0. 3674 for (unsigned i = 2, e = Operands.size(); i != e; ++i) { 3675 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3676 if (Op.isSMEMOffset()) 3677 return Op.getStartLoc(); 3678 } 3679 return getLoc(); 3680 } 3681 3682 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3683 const OperandVector &Operands) { 3684 if (isCI() || isSI()) 3685 return true; 3686 3687 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3688 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3689 return true; 3690 3691 auto Opcode = Inst.getOpcode(); 3692 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3693 if (OpNum == -1) 3694 return true; 3695 3696 const auto &Op = Inst.getOperand(OpNum); 3697 if (!Op.isImm()) 3698 return true; 3699 3700 uint64_t Offset = Op.getImm(); 3701 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3702 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3703 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3704 return true; 3705 3706 Error(getSMEMOffsetLoc(Operands), 3707 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3708 "expected a 21-bit signed offset"); 3709 3710 return false; 3711 } 3712 3713 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3714 unsigned Opcode = Inst.getOpcode(); 3715 const MCInstrDesc &Desc = MII.get(Opcode); 3716 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3717 return true; 3718 3719 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3720 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3721 3722 const int OpIndices[] = { Src0Idx, Src1Idx }; 3723 3724 unsigned NumExprs = 0; 3725 unsigned NumLiterals = 0; 3726 uint32_t LiteralValue; 3727 3728 for (int OpIdx : OpIndices) { 3729 if (OpIdx == -1) break; 3730 3731 const MCOperand &MO = Inst.getOperand(OpIdx); 3732 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3733 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3734 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3735 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3736 if (NumLiterals == 0 || LiteralValue != Value) { 3737 LiteralValue = Value; 3738 ++NumLiterals; 3739 } 3740 } else if (MO.isExpr()) { 3741 ++NumExprs; 3742 } 3743 } 3744 } 3745 3746 return NumLiterals + NumExprs <= 1; 3747 } 3748 3749 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3750 const unsigned Opc = Inst.getOpcode(); 3751 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3752 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3753 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3754 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3755 3756 if (OpSel & ~3) 3757 return false; 3758 } 3759 return true; 3760 } 3761 3762 // Check if VCC register matches wavefront size 3763 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3764 auto FB = getFeatureBits(); 3765 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3766 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3767 } 3768 3769 // VOP3 literal is only allowed in GFX10+ and only one can be used 3770 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst, 3771 const OperandVector &Operands) { 3772 unsigned Opcode = Inst.getOpcode(); 3773 const MCInstrDesc &Desc = MII.get(Opcode); 3774 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3775 return true; 3776 3777 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3778 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3779 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3780 3781 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3782 3783 unsigned NumExprs = 0; 3784 unsigned NumLiterals = 0; 3785 uint32_t LiteralValue; 3786 3787 for (int OpIdx : OpIndices) { 3788 if (OpIdx == -1) break; 3789 3790 const MCOperand &MO = Inst.getOperand(OpIdx); 3791 if (!MO.isImm() && !MO.isExpr()) 3792 continue; 3793 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3794 continue; 3795 3796 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3797 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) { 3798 Error(getConstLoc(Operands), 3799 "inline constants are not allowed for this operand"); 3800 return false; 3801 } 3802 3803 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3804 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3805 if (NumLiterals == 0 || LiteralValue != Value) { 3806 LiteralValue = Value; 3807 ++NumLiterals; 3808 } 3809 } else if (MO.isExpr()) { 3810 ++NumExprs; 3811 } 3812 } 3813 NumLiterals += NumExprs; 3814 3815 if (!NumLiterals) 3816 return true; 3817 3818 if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) { 3819 Error(getLitLoc(Operands), "literal operands are not supported"); 3820 return false; 3821 } 3822 3823 if (NumLiterals > 1) { 3824 Error(getLitLoc(Operands), "only one literal operand is allowed"); 3825 return false; 3826 } 3827 3828 return true; 3829 } 3830 3831 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst, 3832 const OperandVector &Operands, 3833 const SMLoc &IDLoc) { 3834 int GLCPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), 3835 AMDGPU::OpName::glc1); 3836 if (GLCPos != -1) { 3837 // -1 is set by GLC_1 default operand. In all cases "glc" must be present 3838 // in the asm string, and the default value means it is not present. 3839 if (Inst.getOperand(GLCPos).getImm() == -1) { 3840 Error(IDLoc, "instruction must use glc"); 3841 return false; 3842 } 3843 } 3844 3845 return true; 3846 } 3847 3848 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3849 const SMLoc &IDLoc, 3850 const OperandVector &Operands) { 3851 if (!validateLdsDirect(Inst)) { 3852 Error(getRegLoc(AMDGPU::LDS_DIRECT, Operands), 3853 "invalid use of lds_direct"); 3854 return false; 3855 } 3856 if (!validateSOPLiteral(Inst)) { 3857 Error(getLitLoc(Operands), 3858 "only one literal operand is allowed"); 3859 return false; 3860 } 3861 if (!validateVOP3Literal(Inst, Operands)) { 3862 return false; 3863 } 3864 if (!validateConstantBusLimitations(Inst, Operands)) { 3865 return false; 3866 } 3867 if (!validateEarlyClobberLimitations(Inst, Operands)) { 3868 return false; 3869 } 3870 if (!validateIntClampSupported(Inst)) { 3871 Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands), 3872 "integer clamping is not supported on this GPU"); 3873 return false; 3874 } 3875 if (!validateOpSel(Inst)) { 3876 Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands), 3877 "invalid op_sel operand"); 3878 return false; 3879 } 3880 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3881 if (!validateMIMGD16(Inst)) { 3882 Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands), 3883 "d16 modifier is not supported on this GPU"); 3884 return false; 3885 } 3886 if (!validateMIMGDim(Inst)) { 3887 Error(IDLoc, "dim modifier is required on this GPU"); 3888 return false; 3889 } 3890 if (!validateMIMGDataSize(Inst)) { 3891 Error(IDLoc, 3892 "image data size does not match dmask and tfe"); 3893 return false; 3894 } 3895 if (!validateMIMGAddrSize(Inst)) { 3896 Error(IDLoc, 3897 "image address size does not match dim and a16"); 3898 return false; 3899 } 3900 if (!validateMIMGAtomicDMask(Inst)) { 3901 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3902 "invalid atomic image dmask"); 3903 return false; 3904 } 3905 if (!validateMIMGGatherDMask(Inst)) { 3906 Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands), 3907 "invalid image_gather dmask: only one bit must be set"); 3908 return false; 3909 } 3910 if (!validateMovrels(Inst, Operands)) { 3911 return false; 3912 } 3913 if (!validateFlatOffset(Inst, Operands)) { 3914 return false; 3915 } 3916 if (!validateSMEMOffset(Inst, Operands)) { 3917 return false; 3918 } 3919 if (!validateMAIAccWrite(Inst, Operands)) { 3920 return false; 3921 } 3922 if (!validateDivScale(Inst)) { 3923 Error(IDLoc, "ABS not allowed in VOP3B instructions"); 3924 return false; 3925 } 3926 if (!validateCoherencyBits(Inst, Operands, IDLoc)) { 3927 return false; 3928 } 3929 3930 return true; 3931 } 3932 3933 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3934 const FeatureBitset &FBS, 3935 unsigned VariantID = 0); 3936 3937 static bool AMDGPUCheckMnemonic(StringRef Mnemonic, 3938 const FeatureBitset &AvailableFeatures, 3939 unsigned VariantID); 3940 3941 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3942 const FeatureBitset &FBS) { 3943 return isSupportedMnemo(Mnemo, FBS, getAllVariants()); 3944 } 3945 3946 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo, 3947 const FeatureBitset &FBS, 3948 ArrayRef<unsigned> Variants) { 3949 for (auto Variant : Variants) { 3950 if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant)) 3951 return true; 3952 } 3953 3954 return false; 3955 } 3956 3957 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo, 3958 const SMLoc &IDLoc) { 3959 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3960 3961 // Check if requested instruction variant is supported. 3962 if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants())) 3963 return false; 3964 3965 // This instruction is not supported. 3966 // Clear any other pending errors because they are no longer relevant. 3967 getParser().clearPendingErrors(); 3968 3969 // Requested instruction variant is not supported. 3970 // Check if any other variants are supported. 3971 StringRef VariantName = getMatchedVariantName(); 3972 if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) { 3973 return Error(IDLoc, 3974 Twine(VariantName, 3975 " variant of this instruction is not supported")); 3976 } 3977 3978 // Finally check if this instruction is supported on any other GPU. 3979 if (isSupportedMnemo(Mnemo, FeatureBitset().set())) { 3980 return Error(IDLoc, "instruction not supported on this GPU"); 3981 } 3982 3983 // Instruction not supported on any GPU. Probably a typo. 3984 std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS); 3985 return Error(IDLoc, "invalid instruction" + Suggestion); 3986 } 3987 3988 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3989 OperandVector &Operands, 3990 MCStreamer &Out, 3991 uint64_t &ErrorInfo, 3992 bool MatchingInlineAsm) { 3993 MCInst Inst; 3994 unsigned Result = Match_Success; 3995 for (auto Variant : getMatchedVariants()) { 3996 uint64_t EI; 3997 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3998 Variant); 3999 // We order match statuses from least to most specific. We use most specific 4000 // status as resulting 4001 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 4002 if ((R == Match_Success) || 4003 (R == Match_PreferE32) || 4004 (R == Match_MissingFeature && Result != Match_PreferE32) || 4005 (R == Match_InvalidOperand && Result != Match_MissingFeature 4006 && Result != Match_PreferE32) || 4007 (R == Match_MnemonicFail && Result != Match_InvalidOperand 4008 && Result != Match_MissingFeature 4009 && Result != Match_PreferE32)) { 4010 Result = R; 4011 ErrorInfo = EI; 4012 } 4013 if (R == Match_Success) 4014 break; 4015 } 4016 4017 if (Result == Match_Success) { 4018 if (!validateInstruction(Inst, IDLoc, Operands)) { 4019 return true; 4020 } 4021 Inst.setLoc(IDLoc); 4022 Out.emitInstruction(Inst, getSTI()); 4023 return false; 4024 } 4025 4026 StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken(); 4027 if (checkUnsupportedInstruction(Mnemo, IDLoc)) { 4028 return true; 4029 } 4030 4031 switch (Result) { 4032 default: break; 4033 case Match_MissingFeature: 4034 // It has been verified that the specified instruction 4035 // mnemonic is valid. A match was found but it requires 4036 // features which are not supported on this GPU. 4037 return Error(IDLoc, "operands are not valid for this GPU or mode"); 4038 4039 case Match_InvalidOperand: { 4040 SMLoc ErrorLoc = IDLoc; 4041 if (ErrorInfo != ~0ULL) { 4042 if (ErrorInfo >= Operands.size()) { 4043 return Error(getLoc(), "too few operands for instruction"); 4044 } 4045 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 4046 if (ErrorLoc == SMLoc()) 4047 ErrorLoc = IDLoc; 4048 } 4049 return Error(ErrorLoc, "invalid operand for instruction"); 4050 } 4051 4052 case Match_PreferE32: 4053 return Error(IDLoc, "internal error: instruction without _e64 suffix " 4054 "should be encoded as e32"); 4055 case Match_MnemonicFail: 4056 llvm_unreachable("Invalid instructions should have been handled already"); 4057 } 4058 llvm_unreachable("Implement any new match types added!"); 4059 } 4060 4061 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 4062 int64_t Tmp = -1; 4063 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 4064 return true; 4065 } 4066 if (getParser().parseAbsoluteExpression(Tmp)) { 4067 return true; 4068 } 4069 Ret = static_cast<uint32_t>(Tmp); 4070 return false; 4071 } 4072 4073 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 4074 uint32_t &Minor) { 4075 if (ParseAsAbsoluteExpression(Major)) 4076 return TokError("invalid major version"); 4077 4078 if (getLexer().isNot(AsmToken::Comma)) 4079 return TokError("minor version number required, comma expected"); 4080 Lex(); 4081 4082 if (ParseAsAbsoluteExpression(Minor)) 4083 return TokError("invalid minor version"); 4084 4085 return false; 4086 } 4087 4088 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 4089 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4090 return TokError("directive only supported for amdgcn architecture"); 4091 4092 std::string Target; 4093 4094 SMLoc TargetStart = getTok().getLoc(); 4095 if (getParser().parseEscapedString(Target)) 4096 return true; 4097 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 4098 4099 std::string ExpectedTarget; 4100 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 4101 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 4102 4103 if (Target != ExpectedTargetOS.str()) 4104 return getParser().Error(TargetRange.Start, "target must match options", 4105 TargetRange); 4106 4107 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 4108 return false; 4109 } 4110 4111 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 4112 return getParser().Error(Range.Start, "value out of range", Range); 4113 } 4114 4115 bool AMDGPUAsmParser::calculateGPRBlocks( 4116 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 4117 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 4118 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 4119 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 4120 // TODO(scott.linder): These calculations are duplicated from 4121 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 4122 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 4123 4124 unsigned NumVGPRs = NextFreeVGPR; 4125 unsigned NumSGPRs = NextFreeSGPR; 4126 4127 if (Version.Major >= 10) 4128 NumSGPRs = 0; 4129 else { 4130 unsigned MaxAddressableNumSGPRs = 4131 IsaInfo::getAddressableNumSGPRs(&getSTI()); 4132 4133 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 4134 NumSGPRs > MaxAddressableNumSGPRs) 4135 return OutOfRangeError(SGPRRange); 4136 4137 NumSGPRs += 4138 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 4139 4140 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 4141 NumSGPRs > MaxAddressableNumSGPRs) 4142 return OutOfRangeError(SGPRRange); 4143 4144 if (Features.test(FeatureSGPRInitBug)) 4145 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 4146 } 4147 4148 VGPRBlocks = 4149 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 4150 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 4151 4152 return false; 4153 } 4154 4155 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 4156 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 4157 return TokError("directive only supported for amdgcn architecture"); 4158 4159 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 4160 return TokError("directive only supported for amdhsa OS"); 4161 4162 StringRef KernelName; 4163 if (getParser().parseIdentifier(KernelName)) 4164 return true; 4165 4166 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 4167 4168 StringSet<> Seen; 4169 4170 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 4171 4172 SMRange VGPRRange; 4173 uint64_t NextFreeVGPR = 0; 4174 SMRange SGPRRange; 4175 uint64_t NextFreeSGPR = 0; 4176 unsigned UserSGPRCount = 0; 4177 bool ReserveVCC = true; 4178 bool ReserveFlatScr = true; 4179 bool ReserveXNACK = hasXNACK(); 4180 Optional<bool> EnableWavefrontSize32; 4181 4182 while (true) { 4183 while (getLexer().is(AsmToken::EndOfStatement)) 4184 Lex(); 4185 4186 if (getLexer().isNot(AsmToken::Identifier)) 4187 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 4188 4189 StringRef ID = getTok().getIdentifier(); 4190 SMRange IDRange = getTok().getLocRange(); 4191 Lex(); 4192 4193 if (ID == ".end_amdhsa_kernel") 4194 break; 4195 4196 if (Seen.find(ID) != Seen.end()) 4197 return TokError(".amdhsa_ directives cannot be repeated"); 4198 Seen.insert(ID); 4199 4200 SMLoc ValStart = getTok().getLoc(); 4201 int64_t IVal; 4202 if (getParser().parseAbsoluteExpression(IVal)) 4203 return true; 4204 SMLoc ValEnd = getTok().getLoc(); 4205 SMRange ValRange = SMRange(ValStart, ValEnd); 4206 4207 if (IVal < 0) 4208 return OutOfRangeError(ValRange); 4209 4210 uint64_t Val = IVal; 4211 4212 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 4213 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 4214 return OutOfRangeError(RANGE); \ 4215 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 4216 4217 if (ID == ".amdhsa_group_segment_fixed_size") { 4218 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 4219 return OutOfRangeError(ValRange); 4220 KD.group_segment_fixed_size = Val; 4221 } else if (ID == ".amdhsa_private_segment_fixed_size") { 4222 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 4223 return OutOfRangeError(ValRange); 4224 KD.private_segment_fixed_size = Val; 4225 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 4226 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4227 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 4228 Val, ValRange); 4229 if (Val) 4230 UserSGPRCount += 4; 4231 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 4232 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4233 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 4234 ValRange); 4235 if (Val) 4236 UserSGPRCount += 2; 4237 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4238 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4239 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4240 ValRange); 4241 if (Val) 4242 UserSGPRCount += 2; 4243 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4244 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4245 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4246 Val, ValRange); 4247 if (Val) 4248 UserSGPRCount += 2; 4249 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4250 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4251 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4252 ValRange); 4253 if (Val) 4254 UserSGPRCount += 2; 4255 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4256 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4257 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4258 ValRange); 4259 if (Val) 4260 UserSGPRCount += 2; 4261 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4262 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4263 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4264 Val, ValRange); 4265 if (Val) 4266 UserSGPRCount += 1; 4267 } else if (ID == ".amdhsa_wavefront_size32") { 4268 if (IVersion.Major < 10) 4269 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4270 IDRange); 4271 EnableWavefrontSize32 = Val; 4272 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4273 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4274 Val, ValRange); 4275 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4276 PARSE_BITS_ENTRY( 4277 KD.compute_pgm_rsrc2, 4278 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4279 ValRange); 4280 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4281 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4282 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4283 ValRange); 4284 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4285 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4286 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4287 ValRange); 4288 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4289 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4290 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4291 ValRange); 4292 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4293 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4294 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4295 ValRange); 4296 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4297 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4298 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4299 ValRange); 4300 } else if (ID == ".amdhsa_next_free_vgpr") { 4301 VGPRRange = ValRange; 4302 NextFreeVGPR = Val; 4303 } else if (ID == ".amdhsa_next_free_sgpr") { 4304 SGPRRange = ValRange; 4305 NextFreeSGPR = Val; 4306 } else if (ID == ".amdhsa_reserve_vcc") { 4307 if (!isUInt<1>(Val)) 4308 return OutOfRangeError(ValRange); 4309 ReserveVCC = Val; 4310 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4311 if (IVersion.Major < 7) 4312 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4313 IDRange); 4314 if (!isUInt<1>(Val)) 4315 return OutOfRangeError(ValRange); 4316 ReserveFlatScr = Val; 4317 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4318 if (IVersion.Major < 8) 4319 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4320 IDRange); 4321 if (!isUInt<1>(Val)) 4322 return OutOfRangeError(ValRange); 4323 ReserveXNACK = Val; 4324 } else if (ID == ".amdhsa_float_round_mode_32") { 4325 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4326 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4327 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4328 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4329 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4330 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4331 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4332 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4333 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4334 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4335 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4336 ValRange); 4337 } else if (ID == ".amdhsa_dx10_clamp") { 4338 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4339 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4340 } else if (ID == ".amdhsa_ieee_mode") { 4341 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4342 Val, ValRange); 4343 } else if (ID == ".amdhsa_fp16_overflow") { 4344 if (IVersion.Major < 9) 4345 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4346 IDRange); 4347 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4348 ValRange); 4349 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4350 if (IVersion.Major < 10) 4351 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4352 IDRange); 4353 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4354 ValRange); 4355 } else if (ID == ".amdhsa_memory_ordered") { 4356 if (IVersion.Major < 10) 4357 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4358 IDRange); 4359 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4360 ValRange); 4361 } else if (ID == ".amdhsa_forward_progress") { 4362 if (IVersion.Major < 10) 4363 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4364 IDRange); 4365 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4366 ValRange); 4367 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4368 PARSE_BITS_ENTRY( 4369 KD.compute_pgm_rsrc2, 4370 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4371 ValRange); 4372 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4373 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4374 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4375 Val, ValRange); 4376 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4377 PARSE_BITS_ENTRY( 4378 KD.compute_pgm_rsrc2, 4379 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4380 ValRange); 4381 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4382 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4383 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4384 Val, ValRange); 4385 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4386 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4387 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4388 Val, ValRange); 4389 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4390 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4391 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4392 Val, ValRange); 4393 } else if (ID == ".amdhsa_exception_int_div_zero") { 4394 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4395 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4396 Val, ValRange); 4397 } else { 4398 return getParser().Error(IDRange.Start, 4399 "unknown .amdhsa_kernel directive", IDRange); 4400 } 4401 4402 #undef PARSE_BITS_ENTRY 4403 } 4404 4405 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4406 return TokError(".amdhsa_next_free_vgpr directive is required"); 4407 4408 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4409 return TokError(".amdhsa_next_free_sgpr directive is required"); 4410 4411 unsigned VGPRBlocks; 4412 unsigned SGPRBlocks; 4413 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4414 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4415 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4416 SGPRBlocks)) 4417 return true; 4418 4419 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4420 VGPRBlocks)) 4421 return OutOfRangeError(VGPRRange); 4422 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4423 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4424 4425 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4426 SGPRBlocks)) 4427 return OutOfRangeError(SGPRRange); 4428 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4429 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4430 SGPRBlocks); 4431 4432 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4433 return TokError("too many user SGPRs enabled"); 4434 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4435 UserSGPRCount); 4436 4437 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4438 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4439 ReserveFlatScr, ReserveXNACK); 4440 return false; 4441 } 4442 4443 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4444 uint32_t Major; 4445 uint32_t Minor; 4446 4447 if (ParseDirectiveMajorMinor(Major, Minor)) 4448 return true; 4449 4450 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4451 return false; 4452 } 4453 4454 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4455 uint32_t Major; 4456 uint32_t Minor; 4457 uint32_t Stepping; 4458 StringRef VendorName; 4459 StringRef ArchName; 4460 4461 // If this directive has no arguments, then use the ISA version for the 4462 // targeted GPU. 4463 if (getLexer().is(AsmToken::EndOfStatement)) { 4464 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4465 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4466 ISA.Stepping, 4467 "AMD", "AMDGPU"); 4468 return false; 4469 } 4470 4471 if (ParseDirectiveMajorMinor(Major, Minor)) 4472 return true; 4473 4474 if (getLexer().isNot(AsmToken::Comma)) 4475 return TokError("stepping version number required, comma expected"); 4476 Lex(); 4477 4478 if (ParseAsAbsoluteExpression(Stepping)) 4479 return TokError("invalid stepping version"); 4480 4481 if (getLexer().isNot(AsmToken::Comma)) 4482 return TokError("vendor name required, comma expected"); 4483 Lex(); 4484 4485 if (getLexer().isNot(AsmToken::String)) 4486 return TokError("invalid vendor name"); 4487 4488 VendorName = getLexer().getTok().getStringContents(); 4489 Lex(); 4490 4491 if (getLexer().isNot(AsmToken::Comma)) 4492 return TokError("arch name required, comma expected"); 4493 Lex(); 4494 4495 if (getLexer().isNot(AsmToken::String)) 4496 return TokError("invalid arch name"); 4497 4498 ArchName = getLexer().getTok().getStringContents(); 4499 Lex(); 4500 4501 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4502 VendorName, ArchName); 4503 return false; 4504 } 4505 4506 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4507 amd_kernel_code_t &Header) { 4508 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4509 // assembly for backwards compatibility. 4510 if (ID == "max_scratch_backing_memory_byte_size") { 4511 Parser.eatToEndOfStatement(); 4512 return false; 4513 } 4514 4515 SmallString<40> ErrStr; 4516 raw_svector_ostream Err(ErrStr); 4517 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4518 return TokError(Err.str()); 4519 } 4520 Lex(); 4521 4522 if (ID == "enable_wavefront_size32") { 4523 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4524 if (!isGFX10Plus()) 4525 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4526 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4527 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4528 } else { 4529 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4530 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4531 } 4532 } 4533 4534 if (ID == "wavefront_size") { 4535 if (Header.wavefront_size == 5) { 4536 if (!isGFX10Plus()) 4537 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4538 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4539 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4540 } else if (Header.wavefront_size == 6) { 4541 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4542 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4543 } 4544 } 4545 4546 if (ID == "enable_wgp_mode") { 4547 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && 4548 !isGFX10Plus()) 4549 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4550 } 4551 4552 if (ID == "enable_mem_ordered") { 4553 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && 4554 !isGFX10Plus()) 4555 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4556 } 4557 4558 if (ID == "enable_fwd_progress") { 4559 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && 4560 !isGFX10Plus()) 4561 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4562 } 4563 4564 return false; 4565 } 4566 4567 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4568 amd_kernel_code_t Header; 4569 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4570 4571 while (true) { 4572 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4573 // will set the current token to EndOfStatement. 4574 while(getLexer().is(AsmToken::EndOfStatement)) 4575 Lex(); 4576 4577 if (getLexer().isNot(AsmToken::Identifier)) 4578 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4579 4580 StringRef ID = getLexer().getTok().getIdentifier(); 4581 Lex(); 4582 4583 if (ID == ".end_amd_kernel_code_t") 4584 break; 4585 4586 if (ParseAMDKernelCodeTValue(ID, Header)) 4587 return true; 4588 } 4589 4590 getTargetStreamer().EmitAMDKernelCodeT(Header); 4591 4592 return false; 4593 } 4594 4595 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4596 if (getLexer().isNot(AsmToken::Identifier)) 4597 return TokError("expected symbol name"); 4598 4599 StringRef KernelName = Parser.getTok().getString(); 4600 4601 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4602 ELF::STT_AMDGPU_HSA_KERNEL); 4603 Lex(); 4604 4605 KernelScope.initialize(getContext()); 4606 return false; 4607 } 4608 4609 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4610 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4611 return Error(getParser().getTok().getLoc(), 4612 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4613 "architectures"); 4614 } 4615 4616 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4617 4618 std::string ISAVersionStringFromSTI; 4619 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4620 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4621 4622 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4623 return Error(getParser().getTok().getLoc(), 4624 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4625 "arguments specified through the command line"); 4626 } 4627 4628 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4629 Lex(); 4630 4631 return false; 4632 } 4633 4634 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4635 const char *AssemblerDirectiveBegin; 4636 const char *AssemblerDirectiveEnd; 4637 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4638 isHsaAbiVersion3(&getSTI()) 4639 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4640 HSAMD::V3::AssemblerDirectiveEnd) 4641 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4642 HSAMD::AssemblerDirectiveEnd); 4643 4644 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4645 return Error(getParser().getTok().getLoc(), 4646 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4647 "not available on non-amdhsa OSes")).str()); 4648 } 4649 4650 std::string HSAMetadataString; 4651 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4652 HSAMetadataString)) 4653 return true; 4654 4655 if (isHsaAbiVersion3(&getSTI())) { 4656 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4657 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4658 } else { 4659 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4660 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4661 } 4662 4663 return false; 4664 } 4665 4666 /// Common code to parse out a block of text (typically YAML) between start and 4667 /// end directives. 4668 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4669 const char *AssemblerDirectiveEnd, 4670 std::string &CollectString) { 4671 4672 raw_string_ostream CollectStream(CollectString); 4673 4674 getLexer().setSkipSpace(false); 4675 4676 bool FoundEnd = false; 4677 while (!getLexer().is(AsmToken::Eof)) { 4678 while (getLexer().is(AsmToken::Space)) { 4679 CollectStream << getLexer().getTok().getString(); 4680 Lex(); 4681 } 4682 4683 if (getLexer().is(AsmToken::Identifier)) { 4684 StringRef ID = getLexer().getTok().getIdentifier(); 4685 if (ID == AssemblerDirectiveEnd) { 4686 Lex(); 4687 FoundEnd = true; 4688 break; 4689 } 4690 } 4691 4692 CollectStream << Parser.parseStringToEndOfStatement() 4693 << getContext().getAsmInfo()->getSeparatorString(); 4694 4695 Parser.eatToEndOfStatement(); 4696 } 4697 4698 getLexer().setSkipSpace(true); 4699 4700 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4701 return TokError(Twine("expected directive ") + 4702 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4703 } 4704 4705 CollectStream.flush(); 4706 return false; 4707 } 4708 4709 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4710 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4711 std::string String; 4712 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4713 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4714 return true; 4715 4716 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4717 if (!PALMetadata->setFromString(String)) 4718 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4719 return false; 4720 } 4721 4722 /// Parse the assembler directive for old linear-format PAL metadata. 4723 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4724 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4725 return Error(getParser().getTok().getLoc(), 4726 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4727 "not available on non-amdpal OSes")).str()); 4728 } 4729 4730 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4731 PALMetadata->setLegacy(); 4732 for (;;) { 4733 uint32_t Key, Value; 4734 if (ParseAsAbsoluteExpression(Key)) { 4735 return TokError(Twine("invalid value in ") + 4736 Twine(PALMD::AssemblerDirective)); 4737 } 4738 if (getLexer().isNot(AsmToken::Comma)) { 4739 return TokError(Twine("expected an even number of values in ") + 4740 Twine(PALMD::AssemblerDirective)); 4741 } 4742 Lex(); 4743 if (ParseAsAbsoluteExpression(Value)) { 4744 return TokError(Twine("invalid value in ") + 4745 Twine(PALMD::AssemblerDirective)); 4746 } 4747 PALMetadata->setRegister(Key, Value); 4748 if (getLexer().isNot(AsmToken::Comma)) 4749 break; 4750 Lex(); 4751 } 4752 return false; 4753 } 4754 4755 /// ParseDirectiveAMDGPULDS 4756 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4757 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4758 if (getParser().checkForValidSection()) 4759 return true; 4760 4761 StringRef Name; 4762 SMLoc NameLoc = getLexer().getLoc(); 4763 if (getParser().parseIdentifier(Name)) 4764 return TokError("expected identifier in directive"); 4765 4766 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4767 if (parseToken(AsmToken::Comma, "expected ','")) 4768 return true; 4769 4770 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4771 4772 int64_t Size; 4773 SMLoc SizeLoc = getLexer().getLoc(); 4774 if (getParser().parseAbsoluteExpression(Size)) 4775 return true; 4776 if (Size < 0) 4777 return Error(SizeLoc, "size must be non-negative"); 4778 if (Size > LocalMemorySize) 4779 return Error(SizeLoc, "size is too large"); 4780 4781 int64_t Alignment = 4; 4782 if (getLexer().is(AsmToken::Comma)) { 4783 Lex(); 4784 SMLoc AlignLoc = getLexer().getLoc(); 4785 if (getParser().parseAbsoluteExpression(Alignment)) 4786 return true; 4787 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4788 return Error(AlignLoc, "alignment must be a power of two"); 4789 4790 // Alignment larger than the size of LDS is possible in theory, as long 4791 // as the linker manages to place to symbol at address 0, but we do want 4792 // to make sure the alignment fits nicely into a 32-bit integer. 4793 if (Alignment >= 1u << 31) 4794 return Error(AlignLoc, "alignment is too large"); 4795 } 4796 4797 if (parseToken(AsmToken::EndOfStatement, 4798 "unexpected token in '.amdgpu_lds' directive")) 4799 return true; 4800 4801 Symbol->redefineIfPossible(); 4802 if (!Symbol->isUndefined()) 4803 return Error(NameLoc, "invalid symbol redefinition"); 4804 4805 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4806 return false; 4807 } 4808 4809 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4810 StringRef IDVal = DirectiveID.getString(); 4811 4812 if (isHsaAbiVersion3(&getSTI())) { 4813 if (IDVal == ".amdgcn_target") 4814 return ParseDirectiveAMDGCNTarget(); 4815 4816 if (IDVal == ".amdhsa_kernel") 4817 return ParseDirectiveAMDHSAKernel(); 4818 4819 // TODO: Restructure/combine with PAL metadata directive. 4820 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4821 return ParseDirectiveHSAMetadata(); 4822 } else { 4823 if (IDVal == ".hsa_code_object_version") 4824 return ParseDirectiveHSACodeObjectVersion(); 4825 4826 if (IDVal == ".hsa_code_object_isa") 4827 return ParseDirectiveHSACodeObjectISA(); 4828 4829 if (IDVal == ".amd_kernel_code_t") 4830 return ParseDirectiveAMDKernelCodeT(); 4831 4832 if (IDVal == ".amdgpu_hsa_kernel") 4833 return ParseDirectiveAMDGPUHsaKernel(); 4834 4835 if (IDVal == ".amd_amdgpu_isa") 4836 return ParseDirectiveISAVersion(); 4837 4838 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4839 return ParseDirectiveHSAMetadata(); 4840 } 4841 4842 if (IDVal == ".amdgpu_lds") 4843 return ParseDirectiveAMDGPULDS(); 4844 4845 if (IDVal == PALMD::AssemblerDirectiveBegin) 4846 return ParseDirectivePALMetadataBegin(); 4847 4848 if (IDVal == PALMD::AssemblerDirective) 4849 return ParseDirectivePALMetadata(); 4850 4851 return true; 4852 } 4853 4854 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4855 unsigned RegNo) const { 4856 4857 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4858 R.isValid(); ++R) { 4859 if (*R == RegNo) 4860 return isGFX9Plus(); 4861 } 4862 4863 // GFX10 has 2 more SGPRs 104 and 105. 4864 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4865 R.isValid(); ++R) { 4866 if (*R == RegNo) 4867 return hasSGPR104_SGPR105(); 4868 } 4869 4870 switch (RegNo) { 4871 case AMDGPU::SRC_SHARED_BASE: 4872 case AMDGPU::SRC_SHARED_LIMIT: 4873 case AMDGPU::SRC_PRIVATE_BASE: 4874 case AMDGPU::SRC_PRIVATE_LIMIT: 4875 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4876 return isGFX9Plus(); 4877 case AMDGPU::TBA: 4878 case AMDGPU::TBA_LO: 4879 case AMDGPU::TBA_HI: 4880 case AMDGPU::TMA: 4881 case AMDGPU::TMA_LO: 4882 case AMDGPU::TMA_HI: 4883 return !isGFX9Plus(); 4884 case AMDGPU::XNACK_MASK: 4885 case AMDGPU::XNACK_MASK_LO: 4886 case AMDGPU::XNACK_MASK_HI: 4887 return (isVI() || isGFX9()) && hasXNACK(); 4888 case AMDGPU::SGPR_NULL: 4889 return isGFX10Plus(); 4890 default: 4891 break; 4892 } 4893 4894 if (isCI()) 4895 return true; 4896 4897 if (isSI() || isGFX10Plus()) { 4898 // No flat_scr on SI. 4899 // On GFX10 flat scratch is not a valid register operand and can only be 4900 // accessed with s_setreg/s_getreg. 4901 switch (RegNo) { 4902 case AMDGPU::FLAT_SCR: 4903 case AMDGPU::FLAT_SCR_LO: 4904 case AMDGPU::FLAT_SCR_HI: 4905 return false; 4906 default: 4907 return true; 4908 } 4909 } 4910 4911 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4912 // SI/CI have. 4913 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4914 R.isValid(); ++R) { 4915 if (*R == RegNo) 4916 return hasSGPR102_SGPR103(); 4917 } 4918 4919 return true; 4920 } 4921 4922 OperandMatchResultTy 4923 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4924 OperandMode Mode) { 4925 // Try to parse with a custom parser 4926 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4927 4928 // If we successfully parsed the operand or if there as an error parsing, 4929 // we are done. 4930 // 4931 // If we are parsing after we reach EndOfStatement then this means we 4932 // are appending default values to the Operands list. This is only done 4933 // by custom parser, so we shouldn't continue on to the generic parsing. 4934 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4935 getLexer().is(AsmToken::EndOfStatement)) 4936 return ResTy; 4937 4938 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4939 unsigned Prefix = Operands.size(); 4940 SMLoc LBraceLoc = getTok().getLoc(); 4941 Parser.Lex(); // eat the '[' 4942 4943 for (;;) { 4944 ResTy = parseReg(Operands); 4945 if (ResTy != MatchOperand_Success) 4946 return ResTy; 4947 4948 if (getLexer().is(AsmToken::RBrac)) 4949 break; 4950 4951 if (getLexer().isNot(AsmToken::Comma)) 4952 return MatchOperand_ParseFail; 4953 Parser.Lex(); 4954 } 4955 4956 if (Operands.size() - Prefix > 1) { 4957 Operands.insert(Operands.begin() + Prefix, 4958 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4959 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4960 getTok().getLoc())); 4961 } 4962 4963 Parser.Lex(); // eat the ']' 4964 return MatchOperand_Success; 4965 } 4966 4967 return parseRegOrImm(Operands); 4968 } 4969 4970 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4971 // Clear any forced encodings from the previous instruction. 4972 setForcedEncodingSize(0); 4973 setForcedDPP(false); 4974 setForcedSDWA(false); 4975 4976 if (Name.endswith("_e64")) { 4977 setForcedEncodingSize(64); 4978 return Name.substr(0, Name.size() - 4); 4979 } else if (Name.endswith("_e32")) { 4980 setForcedEncodingSize(32); 4981 return Name.substr(0, Name.size() - 4); 4982 } else if (Name.endswith("_dpp")) { 4983 setForcedDPP(true); 4984 return Name.substr(0, Name.size() - 4); 4985 } else if (Name.endswith("_sdwa")) { 4986 setForcedSDWA(true); 4987 return Name.substr(0, Name.size() - 5); 4988 } 4989 return Name; 4990 } 4991 4992 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4993 StringRef Name, 4994 SMLoc NameLoc, OperandVector &Operands) { 4995 // Add the instruction mnemonic 4996 Name = parseMnemonicSuffix(Name); 4997 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4998 4999 bool IsMIMG = Name.startswith("image_"); 5000 5001 while (!getLexer().is(AsmToken::EndOfStatement)) { 5002 OperandMode Mode = OperandMode_Default; 5003 if (IsMIMG && isGFX10Plus() && Operands.size() == 2) 5004 Mode = OperandMode_NSA; 5005 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 5006 5007 // Eat the comma or space if there is one. 5008 if (getLexer().is(AsmToken::Comma)) 5009 Parser.Lex(); 5010 5011 if (Res != MatchOperand_Success) { 5012 checkUnsupportedInstruction(Name, NameLoc); 5013 if (!Parser.hasPendingError()) { 5014 // FIXME: use real operand location rather than the current location. 5015 StringRef Msg = 5016 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 5017 "not a valid operand."; 5018 Error(getLexer().getLoc(), Msg); 5019 } 5020 while (!getLexer().is(AsmToken::EndOfStatement)) { 5021 Parser.Lex(); 5022 } 5023 return true; 5024 } 5025 } 5026 5027 return false; 5028 } 5029 5030 //===----------------------------------------------------------------------===// 5031 // Utility functions 5032 //===----------------------------------------------------------------------===// 5033 5034 OperandMatchResultTy 5035 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 5036 5037 if (!trySkipId(Prefix, AsmToken::Colon)) 5038 return MatchOperand_NoMatch; 5039 5040 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 5041 } 5042 5043 OperandMatchResultTy 5044 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 5045 AMDGPUOperand::ImmTy ImmTy, 5046 bool (*ConvertResult)(int64_t&)) { 5047 SMLoc S = getLoc(); 5048 int64_t Value = 0; 5049 5050 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 5051 if (Res != MatchOperand_Success) 5052 return Res; 5053 5054 if (ConvertResult && !ConvertResult(Value)) { 5055 Error(S, "invalid " + StringRef(Prefix) + " value."); 5056 } 5057 5058 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 5059 return MatchOperand_Success; 5060 } 5061 5062 OperandMatchResultTy 5063 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 5064 OperandVector &Operands, 5065 AMDGPUOperand::ImmTy ImmTy, 5066 bool (*ConvertResult)(int64_t&)) { 5067 SMLoc S = getLoc(); 5068 if (!trySkipId(Prefix, AsmToken::Colon)) 5069 return MatchOperand_NoMatch; 5070 5071 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 5072 return MatchOperand_ParseFail; 5073 5074 unsigned Val = 0; 5075 const unsigned MaxSize = 4; 5076 5077 // FIXME: How to verify the number of elements matches the number of src 5078 // operands? 5079 for (int I = 0; ; ++I) { 5080 int64_t Op; 5081 SMLoc Loc = getLoc(); 5082 if (!parseExpr(Op)) 5083 return MatchOperand_ParseFail; 5084 5085 if (Op != 0 && Op != 1) { 5086 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 5087 return MatchOperand_ParseFail; 5088 } 5089 5090 Val |= (Op << I); 5091 5092 if (trySkipToken(AsmToken::RBrac)) 5093 break; 5094 5095 if (I + 1 == MaxSize) { 5096 Error(getLoc(), "expected a closing square bracket"); 5097 return MatchOperand_ParseFail; 5098 } 5099 5100 if (!skipToken(AsmToken::Comma, "expected a comma")) 5101 return MatchOperand_ParseFail; 5102 } 5103 5104 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 5105 return MatchOperand_Success; 5106 } 5107 5108 OperandMatchResultTy 5109 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 5110 AMDGPUOperand::ImmTy ImmTy) { 5111 int64_t Bit = 0; 5112 SMLoc S = Parser.getTok().getLoc(); 5113 5114 // We are at the end of the statement, and this is a default argument, so 5115 // use a default value. 5116 if (getLexer().isNot(AsmToken::EndOfStatement)) { 5117 switch(getLexer().getKind()) { 5118 case AsmToken::Identifier: { 5119 StringRef Tok = Parser.getTok().getString(); 5120 if (Tok == Name) { 5121 if (Tok == "r128" && !hasMIMG_R128()) 5122 Error(S, "r128 modifier is not supported on this GPU"); 5123 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 5124 Error(S, "a16 modifier is not supported on this GPU"); 5125 Bit = 1; 5126 Parser.Lex(); 5127 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 5128 Bit = 0; 5129 Parser.Lex(); 5130 } else { 5131 return MatchOperand_NoMatch; 5132 } 5133 break; 5134 } 5135 default: 5136 return MatchOperand_NoMatch; 5137 } 5138 } 5139 5140 if (!isGFX10Plus() && ImmTy == AMDGPUOperand::ImmTyDLC) 5141 return MatchOperand_ParseFail; 5142 5143 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 5144 ImmTy = AMDGPUOperand::ImmTyR128A16; 5145 5146 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 5147 return MatchOperand_Success; 5148 } 5149 5150 static void addOptionalImmOperand( 5151 MCInst& Inst, const OperandVector& Operands, 5152 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 5153 AMDGPUOperand::ImmTy ImmT, 5154 int64_t Default = 0) { 5155 auto i = OptionalIdx.find(ImmT); 5156 if (i != OptionalIdx.end()) { 5157 unsigned Idx = i->second; 5158 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 5159 } else { 5160 Inst.addOperand(MCOperand::createImm(Default)); 5161 } 5162 } 5163 5164 OperandMatchResultTy 5165 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 5166 if (getLexer().isNot(AsmToken::Identifier)) { 5167 return MatchOperand_NoMatch; 5168 } 5169 StringRef Tok = Parser.getTok().getString(); 5170 if (Tok != Prefix) { 5171 return MatchOperand_NoMatch; 5172 } 5173 5174 Parser.Lex(); 5175 if (getLexer().isNot(AsmToken::Colon)) { 5176 return MatchOperand_ParseFail; 5177 } 5178 5179 Parser.Lex(); 5180 if (getLexer().isNot(AsmToken::Identifier)) { 5181 return MatchOperand_ParseFail; 5182 } 5183 5184 Value = Parser.getTok().getString(); 5185 return MatchOperand_Success; 5186 } 5187 5188 //===----------------------------------------------------------------------===// 5189 // MTBUF format 5190 //===----------------------------------------------------------------------===// 5191 5192 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 5193 int64_t MaxVal, 5194 int64_t &Fmt) { 5195 int64_t Val; 5196 SMLoc Loc = getLoc(); 5197 5198 auto Res = parseIntWithPrefix(Pref, Val); 5199 if (Res == MatchOperand_ParseFail) 5200 return false; 5201 if (Res == MatchOperand_NoMatch) 5202 return true; 5203 5204 if (Val < 0 || Val > MaxVal) { 5205 Error(Loc, Twine("out of range ", StringRef(Pref))); 5206 return false; 5207 } 5208 5209 Fmt = Val; 5210 return true; 5211 } 5212 5213 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 5214 // values to live in a joint format operand in the MCInst encoding. 5215 OperandMatchResultTy 5216 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 5217 using namespace llvm::AMDGPU::MTBUFFormat; 5218 5219 int64_t Dfmt = DFMT_UNDEF; 5220 int64_t Nfmt = NFMT_UNDEF; 5221 5222 // dfmt and nfmt can appear in either order, and each is optional. 5223 for (int I = 0; I < 2; ++I) { 5224 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 5225 return MatchOperand_ParseFail; 5226 5227 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 5228 return MatchOperand_ParseFail; 5229 } 5230 // Skip optional comma between dfmt/nfmt 5231 // but guard against 2 commas following each other. 5232 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 5233 !peekToken().is(AsmToken::Comma)) { 5234 trySkipToken(AsmToken::Comma); 5235 } 5236 } 5237 5238 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5239 return MatchOperand_NoMatch; 5240 5241 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5242 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5243 5244 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5245 return MatchOperand_Success; 5246 } 5247 5248 OperandMatchResultTy 5249 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5250 using namespace llvm::AMDGPU::MTBUFFormat; 5251 5252 int64_t Fmt = UFMT_UNDEF; 5253 5254 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5255 return MatchOperand_ParseFail; 5256 5257 if (Fmt == UFMT_UNDEF) 5258 return MatchOperand_NoMatch; 5259 5260 Format = Fmt; 5261 return MatchOperand_Success; 5262 } 5263 5264 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5265 int64_t &Nfmt, 5266 StringRef FormatStr, 5267 SMLoc Loc) { 5268 using namespace llvm::AMDGPU::MTBUFFormat; 5269 int64_t Format; 5270 5271 Format = getDfmt(FormatStr); 5272 if (Format != DFMT_UNDEF) { 5273 Dfmt = Format; 5274 return true; 5275 } 5276 5277 Format = getNfmt(FormatStr, getSTI()); 5278 if (Format != NFMT_UNDEF) { 5279 Nfmt = Format; 5280 return true; 5281 } 5282 5283 Error(Loc, "unsupported format"); 5284 return false; 5285 } 5286 5287 OperandMatchResultTy 5288 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5289 SMLoc FormatLoc, 5290 int64_t &Format) { 5291 using namespace llvm::AMDGPU::MTBUFFormat; 5292 5293 int64_t Dfmt = DFMT_UNDEF; 5294 int64_t Nfmt = NFMT_UNDEF; 5295 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5296 return MatchOperand_ParseFail; 5297 5298 if (trySkipToken(AsmToken::Comma)) { 5299 StringRef Str; 5300 SMLoc Loc = getLoc(); 5301 if (!parseId(Str, "expected a format string") || 5302 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5303 return MatchOperand_ParseFail; 5304 } 5305 if (Dfmt == DFMT_UNDEF) { 5306 Error(Loc, "duplicate numeric format"); 5307 return MatchOperand_ParseFail; 5308 } else if (Nfmt == NFMT_UNDEF) { 5309 Error(Loc, "duplicate data format"); 5310 return MatchOperand_ParseFail; 5311 } 5312 } 5313 5314 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5315 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5316 5317 if (isGFX10Plus()) { 5318 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5319 if (Ufmt == UFMT_UNDEF) { 5320 Error(FormatLoc, "unsupported format"); 5321 return MatchOperand_ParseFail; 5322 } 5323 Format = Ufmt; 5324 } else { 5325 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5326 } 5327 5328 return MatchOperand_Success; 5329 } 5330 5331 OperandMatchResultTy 5332 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5333 SMLoc Loc, 5334 int64_t &Format) { 5335 using namespace llvm::AMDGPU::MTBUFFormat; 5336 5337 auto Id = getUnifiedFormat(FormatStr); 5338 if (Id == UFMT_UNDEF) 5339 return MatchOperand_NoMatch; 5340 5341 if (!isGFX10Plus()) { 5342 Error(Loc, "unified format is not supported on this GPU"); 5343 return MatchOperand_ParseFail; 5344 } 5345 5346 Format = Id; 5347 return MatchOperand_Success; 5348 } 5349 5350 OperandMatchResultTy 5351 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5352 using namespace llvm::AMDGPU::MTBUFFormat; 5353 SMLoc Loc = getLoc(); 5354 5355 if (!parseExpr(Format)) 5356 return MatchOperand_ParseFail; 5357 if (!isValidFormatEncoding(Format, getSTI())) { 5358 Error(Loc, "out of range format"); 5359 return MatchOperand_ParseFail; 5360 } 5361 5362 return MatchOperand_Success; 5363 } 5364 5365 OperandMatchResultTy 5366 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5367 using namespace llvm::AMDGPU::MTBUFFormat; 5368 5369 if (!trySkipId("format", AsmToken::Colon)) 5370 return MatchOperand_NoMatch; 5371 5372 if (trySkipToken(AsmToken::LBrac)) { 5373 StringRef FormatStr; 5374 SMLoc Loc = getLoc(); 5375 if (!parseId(FormatStr, "expected a format string")) 5376 return MatchOperand_ParseFail; 5377 5378 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5379 if (Res == MatchOperand_NoMatch) 5380 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5381 if (Res != MatchOperand_Success) 5382 return Res; 5383 5384 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5385 return MatchOperand_ParseFail; 5386 5387 return MatchOperand_Success; 5388 } 5389 5390 return parseNumericFormat(Format); 5391 } 5392 5393 OperandMatchResultTy 5394 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5395 using namespace llvm::AMDGPU::MTBUFFormat; 5396 5397 int64_t Format = getDefaultFormatEncoding(getSTI()); 5398 OperandMatchResultTy Res; 5399 SMLoc Loc = getLoc(); 5400 5401 // Parse legacy format syntax. 5402 Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5403 if (Res == MatchOperand_ParseFail) 5404 return Res; 5405 5406 bool FormatFound = (Res == MatchOperand_Success); 5407 5408 Operands.push_back( 5409 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5410 5411 if (FormatFound) 5412 trySkipToken(AsmToken::Comma); 5413 5414 if (isToken(AsmToken::EndOfStatement)) { 5415 // We are expecting an soffset operand, 5416 // but let matcher handle the error. 5417 return MatchOperand_Success; 5418 } 5419 5420 // Parse soffset. 5421 Res = parseRegOrImm(Operands); 5422 if (Res != MatchOperand_Success) 5423 return Res; 5424 5425 trySkipToken(AsmToken::Comma); 5426 5427 if (!FormatFound) { 5428 Res = parseSymbolicOrNumericFormat(Format); 5429 if (Res == MatchOperand_ParseFail) 5430 return Res; 5431 if (Res == MatchOperand_Success) { 5432 auto Size = Operands.size(); 5433 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5434 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5435 Op.setImm(Format); 5436 } 5437 return MatchOperand_Success; 5438 } 5439 5440 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5441 Error(getLoc(), "duplicate format"); 5442 return MatchOperand_ParseFail; 5443 } 5444 return MatchOperand_Success; 5445 } 5446 5447 //===----------------------------------------------------------------------===// 5448 // ds 5449 //===----------------------------------------------------------------------===// 5450 5451 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5452 const OperandVector &Operands) { 5453 OptionalImmIndexMap OptionalIdx; 5454 5455 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5456 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5457 5458 // Add the register arguments 5459 if (Op.isReg()) { 5460 Op.addRegOperands(Inst, 1); 5461 continue; 5462 } 5463 5464 // Handle optional arguments 5465 OptionalIdx[Op.getImmTy()] = i; 5466 } 5467 5468 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5469 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5471 5472 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5473 } 5474 5475 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5476 bool IsGdsHardcoded) { 5477 OptionalImmIndexMap OptionalIdx; 5478 5479 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5480 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5481 5482 // Add the register arguments 5483 if (Op.isReg()) { 5484 Op.addRegOperands(Inst, 1); 5485 continue; 5486 } 5487 5488 if (Op.isToken() && Op.getToken() == "gds") { 5489 IsGdsHardcoded = true; 5490 continue; 5491 } 5492 5493 // Handle optional arguments 5494 OptionalIdx[Op.getImmTy()] = i; 5495 } 5496 5497 AMDGPUOperand::ImmTy OffsetType = 5498 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5499 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5500 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5501 AMDGPUOperand::ImmTyOffset; 5502 5503 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5504 5505 if (!IsGdsHardcoded) { 5506 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5507 } 5508 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5509 } 5510 5511 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5512 OptionalImmIndexMap OptionalIdx; 5513 5514 unsigned OperandIdx[4]; 5515 unsigned EnMask = 0; 5516 int SrcIdx = 0; 5517 5518 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5519 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5520 5521 // Add the register arguments 5522 if (Op.isReg()) { 5523 assert(SrcIdx < 4); 5524 OperandIdx[SrcIdx] = Inst.size(); 5525 Op.addRegOperands(Inst, 1); 5526 ++SrcIdx; 5527 continue; 5528 } 5529 5530 if (Op.isOff()) { 5531 assert(SrcIdx < 4); 5532 OperandIdx[SrcIdx] = Inst.size(); 5533 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5534 ++SrcIdx; 5535 continue; 5536 } 5537 5538 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5539 Op.addImmOperands(Inst, 1); 5540 continue; 5541 } 5542 5543 if (Op.isToken() && Op.getToken() == "done") 5544 continue; 5545 5546 // Handle optional arguments 5547 OptionalIdx[Op.getImmTy()] = i; 5548 } 5549 5550 assert(SrcIdx == 4); 5551 5552 bool Compr = false; 5553 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5554 Compr = true; 5555 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5556 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5557 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5558 } 5559 5560 for (auto i = 0; i < SrcIdx; ++i) { 5561 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5562 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5563 } 5564 } 5565 5566 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5567 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5568 5569 Inst.addOperand(MCOperand::createImm(EnMask)); 5570 } 5571 5572 //===----------------------------------------------------------------------===// 5573 // s_waitcnt 5574 //===----------------------------------------------------------------------===// 5575 5576 static bool 5577 encodeCnt( 5578 const AMDGPU::IsaVersion ISA, 5579 int64_t &IntVal, 5580 int64_t CntVal, 5581 bool Saturate, 5582 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5583 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5584 { 5585 bool Failed = false; 5586 5587 IntVal = encode(ISA, IntVal, CntVal); 5588 if (CntVal != decode(ISA, IntVal)) { 5589 if (Saturate) { 5590 IntVal = encode(ISA, IntVal, -1); 5591 } else { 5592 Failed = true; 5593 } 5594 } 5595 return Failed; 5596 } 5597 5598 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5599 5600 SMLoc CntLoc = getLoc(); 5601 StringRef CntName = getTokenStr(); 5602 5603 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5604 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5605 return false; 5606 5607 int64_t CntVal; 5608 SMLoc ValLoc = getLoc(); 5609 if (!parseExpr(CntVal)) 5610 return false; 5611 5612 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5613 5614 bool Failed = true; 5615 bool Sat = CntName.endswith("_sat"); 5616 5617 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5618 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5619 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5620 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5621 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5622 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5623 } else { 5624 Error(CntLoc, "invalid counter name " + CntName); 5625 return false; 5626 } 5627 5628 if (Failed) { 5629 Error(ValLoc, "too large value for " + CntName); 5630 return false; 5631 } 5632 5633 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5634 return false; 5635 5636 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5637 if (isToken(AsmToken::EndOfStatement)) { 5638 Error(getLoc(), "expected a counter name"); 5639 return false; 5640 } 5641 } 5642 5643 return true; 5644 } 5645 5646 OperandMatchResultTy 5647 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5648 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5649 int64_t Waitcnt = getWaitcntBitMask(ISA); 5650 SMLoc S = getLoc(); 5651 5652 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5653 while (!isToken(AsmToken::EndOfStatement)) { 5654 if (!parseCnt(Waitcnt)) 5655 return MatchOperand_ParseFail; 5656 } 5657 } else { 5658 if (!parseExpr(Waitcnt)) 5659 return MatchOperand_ParseFail; 5660 } 5661 5662 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5663 return MatchOperand_Success; 5664 } 5665 5666 bool 5667 AMDGPUOperand::isSWaitCnt() const { 5668 return isImm(); 5669 } 5670 5671 //===----------------------------------------------------------------------===// 5672 // hwreg 5673 //===----------------------------------------------------------------------===// 5674 5675 bool 5676 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5677 OperandInfoTy &Offset, 5678 OperandInfoTy &Width) { 5679 using namespace llvm::AMDGPU::Hwreg; 5680 5681 // The register may be specified by name or using a numeric code 5682 HwReg.Loc = getLoc(); 5683 if (isToken(AsmToken::Identifier) && 5684 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5685 HwReg.IsSymbolic = true; 5686 lex(); // skip register name 5687 } else if (!parseExpr(HwReg.Id, "a register name")) { 5688 return false; 5689 } 5690 5691 if (trySkipToken(AsmToken::RParen)) 5692 return true; 5693 5694 // parse optional params 5695 if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis")) 5696 return false; 5697 5698 Offset.Loc = getLoc(); 5699 if (!parseExpr(Offset.Id)) 5700 return false; 5701 5702 if (!skipToken(AsmToken::Comma, "expected a comma")) 5703 return false; 5704 5705 Width.Loc = getLoc(); 5706 return parseExpr(Width.Id) && 5707 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5708 } 5709 5710 bool 5711 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5712 const OperandInfoTy &Offset, 5713 const OperandInfoTy &Width) { 5714 5715 using namespace llvm::AMDGPU::Hwreg; 5716 5717 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5718 Error(HwReg.Loc, 5719 "specified hardware register is not supported on this GPU"); 5720 return false; 5721 } 5722 if (!isValidHwreg(HwReg.Id)) { 5723 Error(HwReg.Loc, 5724 "invalid code of hardware register: only 6-bit values are legal"); 5725 return false; 5726 } 5727 if (!isValidHwregOffset(Offset.Id)) { 5728 Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal"); 5729 return false; 5730 } 5731 if (!isValidHwregWidth(Width.Id)) { 5732 Error(Width.Loc, 5733 "invalid bitfield width: only values from 1 to 32 are legal"); 5734 return false; 5735 } 5736 return true; 5737 } 5738 5739 OperandMatchResultTy 5740 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5741 using namespace llvm::AMDGPU::Hwreg; 5742 5743 int64_t ImmVal = 0; 5744 SMLoc Loc = getLoc(); 5745 5746 if (trySkipId("hwreg", AsmToken::LParen)) { 5747 OperandInfoTy HwReg(ID_UNKNOWN_); 5748 OperandInfoTy Offset(OFFSET_DEFAULT_); 5749 OperandInfoTy Width(WIDTH_DEFAULT_); 5750 if (parseHwregBody(HwReg, Offset, Width) && 5751 validateHwreg(HwReg, Offset, Width)) { 5752 ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id); 5753 } else { 5754 return MatchOperand_ParseFail; 5755 } 5756 } else if (parseExpr(ImmVal, "a hwreg macro")) { 5757 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5758 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5759 return MatchOperand_ParseFail; 5760 } 5761 } else { 5762 return MatchOperand_ParseFail; 5763 } 5764 5765 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5766 return MatchOperand_Success; 5767 } 5768 5769 bool AMDGPUOperand::isHwreg() const { 5770 return isImmTy(ImmTyHwreg); 5771 } 5772 5773 //===----------------------------------------------------------------------===// 5774 // sendmsg 5775 //===----------------------------------------------------------------------===// 5776 5777 bool 5778 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5779 OperandInfoTy &Op, 5780 OperandInfoTy &Stream) { 5781 using namespace llvm::AMDGPU::SendMsg; 5782 5783 Msg.Loc = getLoc(); 5784 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5785 Msg.IsSymbolic = true; 5786 lex(); // skip message name 5787 } else if (!parseExpr(Msg.Id, "a message name")) { 5788 return false; 5789 } 5790 5791 if (trySkipToken(AsmToken::Comma)) { 5792 Op.IsDefined = true; 5793 Op.Loc = getLoc(); 5794 if (isToken(AsmToken::Identifier) && 5795 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5796 lex(); // skip operation name 5797 } else if (!parseExpr(Op.Id, "an operation name")) { 5798 return false; 5799 } 5800 5801 if (trySkipToken(AsmToken::Comma)) { 5802 Stream.IsDefined = true; 5803 Stream.Loc = getLoc(); 5804 if (!parseExpr(Stream.Id)) 5805 return false; 5806 } 5807 } 5808 5809 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5810 } 5811 5812 bool 5813 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5814 const OperandInfoTy &Op, 5815 const OperandInfoTy &Stream) { 5816 using namespace llvm::AMDGPU::SendMsg; 5817 5818 // Validation strictness depends on whether message is specified 5819 // in a symbolc or in a numeric form. In the latter case 5820 // only encoding possibility is checked. 5821 bool Strict = Msg.IsSymbolic; 5822 5823 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5824 Error(Msg.Loc, "invalid message id"); 5825 return false; 5826 } 5827 if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5828 if (Op.IsDefined) { 5829 Error(Op.Loc, "message does not support operations"); 5830 } else { 5831 Error(Msg.Loc, "missing message operation"); 5832 } 5833 return false; 5834 } 5835 if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5836 Error(Op.Loc, "invalid operation id"); 5837 return false; 5838 } 5839 if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5840 Error(Stream.Loc, "message operation does not support streams"); 5841 return false; 5842 } 5843 if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5844 Error(Stream.Loc, "invalid message stream id"); 5845 return false; 5846 } 5847 return true; 5848 } 5849 5850 OperandMatchResultTy 5851 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5852 using namespace llvm::AMDGPU::SendMsg; 5853 5854 int64_t ImmVal = 0; 5855 SMLoc Loc = getLoc(); 5856 5857 if (trySkipId("sendmsg", AsmToken::LParen)) { 5858 OperandInfoTy Msg(ID_UNKNOWN_); 5859 OperandInfoTy Op(OP_NONE_); 5860 OperandInfoTy Stream(STREAM_ID_NONE_); 5861 if (parseSendMsgBody(Msg, Op, Stream) && 5862 validateSendMsg(Msg, Op, Stream)) { 5863 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5864 } else { 5865 return MatchOperand_ParseFail; 5866 } 5867 } else if (parseExpr(ImmVal, "a sendmsg macro")) { 5868 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5869 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5870 return MatchOperand_ParseFail; 5871 } 5872 } else { 5873 return MatchOperand_ParseFail; 5874 } 5875 5876 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5877 return MatchOperand_Success; 5878 } 5879 5880 bool AMDGPUOperand::isSendMsg() const { 5881 return isImmTy(ImmTySendMsg); 5882 } 5883 5884 //===----------------------------------------------------------------------===// 5885 // v_interp 5886 //===----------------------------------------------------------------------===// 5887 5888 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5889 if (getLexer().getKind() != AsmToken::Identifier) 5890 return MatchOperand_NoMatch; 5891 5892 StringRef Str = Parser.getTok().getString(); 5893 int Slot = StringSwitch<int>(Str) 5894 .Case("p10", 0) 5895 .Case("p20", 1) 5896 .Case("p0", 2) 5897 .Default(-1); 5898 5899 SMLoc S = Parser.getTok().getLoc(); 5900 if (Slot == -1) 5901 return MatchOperand_ParseFail; 5902 5903 Parser.Lex(); 5904 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5905 AMDGPUOperand::ImmTyInterpSlot)); 5906 return MatchOperand_Success; 5907 } 5908 5909 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5910 if (getLexer().getKind() != AsmToken::Identifier) 5911 return MatchOperand_NoMatch; 5912 5913 StringRef Str = Parser.getTok().getString(); 5914 if (!Str.startswith("attr")) 5915 return MatchOperand_NoMatch; 5916 5917 StringRef Chan = Str.take_back(2); 5918 int AttrChan = StringSwitch<int>(Chan) 5919 .Case(".x", 0) 5920 .Case(".y", 1) 5921 .Case(".z", 2) 5922 .Case(".w", 3) 5923 .Default(-1); 5924 if (AttrChan == -1) 5925 return MatchOperand_ParseFail; 5926 5927 Str = Str.drop_back(2).drop_front(4); 5928 5929 uint8_t Attr; 5930 if (Str.getAsInteger(10, Attr)) 5931 return MatchOperand_ParseFail; 5932 5933 SMLoc S = Parser.getTok().getLoc(); 5934 Parser.Lex(); 5935 if (Attr > 63) { 5936 Error(S, "out of bounds attr"); 5937 return MatchOperand_ParseFail; 5938 } 5939 5940 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5941 5942 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5943 AMDGPUOperand::ImmTyInterpAttr)); 5944 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5945 AMDGPUOperand::ImmTyAttrChan)); 5946 return MatchOperand_Success; 5947 } 5948 5949 //===----------------------------------------------------------------------===// 5950 // exp 5951 //===----------------------------------------------------------------------===// 5952 5953 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5954 uint8_t &Val) { 5955 if (Str == "null") { 5956 Val = Exp::ET_NULL; 5957 return MatchOperand_Success; 5958 } 5959 5960 if (Str.startswith("mrt")) { 5961 Str = Str.drop_front(3); 5962 if (Str == "z") { // == mrtz 5963 Val = Exp::ET_MRTZ; 5964 return MatchOperand_Success; 5965 } 5966 5967 if (Str.getAsInteger(10, Val)) 5968 return MatchOperand_ParseFail; 5969 5970 if (Val > Exp::ET_MRT7) 5971 return MatchOperand_ParseFail; 5972 5973 return MatchOperand_Success; 5974 } 5975 5976 if (Str.startswith("pos")) { 5977 Str = Str.drop_front(3); 5978 if (Str.getAsInteger(10, Val)) 5979 return MatchOperand_ParseFail; 5980 5981 if (Val > (isGFX10Plus() ? 4 : 3)) 5982 return MatchOperand_ParseFail; 5983 5984 Val += Exp::ET_POS0; 5985 return MatchOperand_Success; 5986 } 5987 5988 if (isGFX10Plus() && Str == "prim") { 5989 Val = Exp::ET_PRIM; 5990 return MatchOperand_Success; 5991 } 5992 5993 if (Str.startswith("param")) { 5994 Str = Str.drop_front(5); 5995 if (Str.getAsInteger(10, Val)) 5996 return MatchOperand_ParseFail; 5997 5998 if (Val >= 32) 5999 return MatchOperand_ParseFail; 6000 6001 Val += Exp::ET_PARAM0; 6002 return MatchOperand_Success; 6003 } 6004 6005 return MatchOperand_ParseFail; 6006 } 6007 6008 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 6009 if (!isToken(AsmToken::Identifier)) 6010 return MatchOperand_NoMatch; 6011 6012 SMLoc S = getLoc(); 6013 6014 uint8_t Val; 6015 auto Res = parseExpTgtImpl(getTokenStr(), Val); 6016 if (Res != MatchOperand_Success) { 6017 Error(S, "invalid exp target"); 6018 return Res; 6019 } 6020 6021 Parser.Lex(); 6022 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 6023 AMDGPUOperand::ImmTyExpTgt)); 6024 return MatchOperand_Success; 6025 } 6026 6027 //===----------------------------------------------------------------------===// 6028 // parser helpers 6029 //===----------------------------------------------------------------------===// 6030 6031 bool 6032 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 6033 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 6034 } 6035 6036 bool 6037 AMDGPUAsmParser::isId(const StringRef Id) const { 6038 return isId(getToken(), Id); 6039 } 6040 6041 bool 6042 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 6043 return getTokenKind() == Kind; 6044 } 6045 6046 bool 6047 AMDGPUAsmParser::trySkipId(const StringRef Id) { 6048 if (isId(Id)) { 6049 lex(); 6050 return true; 6051 } 6052 return false; 6053 } 6054 6055 bool 6056 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 6057 if (isId(Id) && peekToken().is(Kind)) { 6058 lex(); 6059 lex(); 6060 return true; 6061 } 6062 return false; 6063 } 6064 6065 bool 6066 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 6067 if (isToken(Kind)) { 6068 lex(); 6069 return true; 6070 } 6071 return false; 6072 } 6073 6074 bool 6075 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 6076 const StringRef ErrMsg) { 6077 if (!trySkipToken(Kind)) { 6078 Error(getLoc(), ErrMsg); 6079 return false; 6080 } 6081 return true; 6082 } 6083 6084 bool 6085 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) { 6086 SMLoc S = getLoc(); 6087 6088 const MCExpr *Expr; 6089 if (Parser.parseExpression(Expr)) 6090 return false; 6091 6092 if (Expr->evaluateAsAbsolute(Imm)) 6093 return true; 6094 6095 if (Expected.empty()) { 6096 Error(S, "expected absolute expression"); 6097 } else { 6098 Error(S, Twine("expected ", Expected) + 6099 Twine(" or an absolute expression")); 6100 } 6101 return false; 6102 } 6103 6104 bool 6105 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 6106 SMLoc S = getLoc(); 6107 6108 const MCExpr *Expr; 6109 if (Parser.parseExpression(Expr)) 6110 return false; 6111 6112 int64_t IntVal; 6113 if (Expr->evaluateAsAbsolute(IntVal)) { 6114 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 6115 } else { 6116 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 6117 } 6118 return true; 6119 } 6120 6121 bool 6122 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 6123 if (isToken(AsmToken::String)) { 6124 Val = getToken().getStringContents(); 6125 lex(); 6126 return true; 6127 } else { 6128 Error(getLoc(), ErrMsg); 6129 return false; 6130 } 6131 } 6132 6133 bool 6134 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 6135 if (isToken(AsmToken::Identifier)) { 6136 Val = getTokenStr(); 6137 lex(); 6138 return true; 6139 } else { 6140 Error(getLoc(), ErrMsg); 6141 return false; 6142 } 6143 } 6144 6145 AsmToken 6146 AMDGPUAsmParser::getToken() const { 6147 return Parser.getTok(); 6148 } 6149 6150 AsmToken 6151 AMDGPUAsmParser::peekToken() { 6152 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 6153 } 6154 6155 void 6156 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 6157 auto TokCount = getLexer().peekTokens(Tokens); 6158 6159 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 6160 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 6161 } 6162 6163 AsmToken::TokenKind 6164 AMDGPUAsmParser::getTokenKind() const { 6165 return getLexer().getKind(); 6166 } 6167 6168 SMLoc 6169 AMDGPUAsmParser::getLoc() const { 6170 return getToken().getLoc(); 6171 } 6172 6173 StringRef 6174 AMDGPUAsmParser::getTokenStr() const { 6175 return getToken().getString(); 6176 } 6177 6178 void 6179 AMDGPUAsmParser::lex() { 6180 Parser.Lex(); 6181 } 6182 6183 SMLoc 6184 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test, 6185 const OperandVector &Operands) const { 6186 for (unsigned i = Operands.size() - 1; i > 0; --i) { 6187 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6188 if (Test(Op)) 6189 return Op.getStartLoc(); 6190 } 6191 return ((AMDGPUOperand &)*Operands[0]).getStartLoc(); 6192 } 6193 6194 SMLoc 6195 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type, 6196 const OperandVector &Operands) const { 6197 auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); }; 6198 return getOperandLoc(Test, Operands); 6199 } 6200 6201 SMLoc 6202 AMDGPUAsmParser::getRegLoc(unsigned Reg, 6203 const OperandVector &Operands) const { 6204 auto Test = [=](const AMDGPUOperand& Op) { 6205 return Op.isRegKind() && Op.getReg() == Reg; 6206 }; 6207 return getOperandLoc(Test, Operands); 6208 } 6209 6210 SMLoc 6211 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const { 6212 auto Test = [](const AMDGPUOperand& Op) { 6213 return Op.IsImmKindLiteral() || Op.isExpr(); 6214 }; 6215 return getOperandLoc(Test, Operands); 6216 } 6217 6218 SMLoc 6219 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const { 6220 auto Test = [](const AMDGPUOperand& Op) { 6221 return Op.isImmKindConst(); 6222 }; 6223 return getOperandLoc(Test, Operands); 6224 } 6225 6226 //===----------------------------------------------------------------------===// 6227 // swizzle 6228 //===----------------------------------------------------------------------===// 6229 6230 LLVM_READNONE 6231 static unsigned 6232 encodeBitmaskPerm(const unsigned AndMask, 6233 const unsigned OrMask, 6234 const unsigned XorMask) { 6235 using namespace llvm::AMDGPU::Swizzle; 6236 6237 return BITMASK_PERM_ENC | 6238 (AndMask << BITMASK_AND_SHIFT) | 6239 (OrMask << BITMASK_OR_SHIFT) | 6240 (XorMask << BITMASK_XOR_SHIFT); 6241 } 6242 6243 bool 6244 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op, 6245 const unsigned MinVal, 6246 const unsigned MaxVal, 6247 const StringRef ErrMsg, 6248 SMLoc &Loc) { 6249 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6250 return false; 6251 } 6252 Loc = Parser.getTok().getLoc(); 6253 if (!parseExpr(Op)) { 6254 return false; 6255 } 6256 if (Op < MinVal || Op > MaxVal) { 6257 Error(Loc, ErrMsg); 6258 return false; 6259 } 6260 6261 return true; 6262 } 6263 6264 bool 6265 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 6266 const unsigned MinVal, 6267 const unsigned MaxVal, 6268 const StringRef ErrMsg) { 6269 SMLoc Loc; 6270 for (unsigned i = 0; i < OpNum; ++i) { 6271 if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc)) 6272 return false; 6273 } 6274 6275 return true; 6276 } 6277 6278 bool 6279 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 6280 using namespace llvm::AMDGPU::Swizzle; 6281 6282 int64_t Lane[LANE_NUM]; 6283 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 6284 "expected a 2-bit lane id")) { 6285 Imm = QUAD_PERM_ENC; 6286 for (unsigned I = 0; I < LANE_NUM; ++I) { 6287 Imm |= Lane[I] << (LANE_SHIFT * I); 6288 } 6289 return true; 6290 } 6291 return false; 6292 } 6293 6294 bool 6295 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 6296 using namespace llvm::AMDGPU::Swizzle; 6297 6298 SMLoc Loc; 6299 int64_t GroupSize; 6300 int64_t LaneIdx; 6301 6302 if (!parseSwizzleOperand(GroupSize, 6303 2, 32, 6304 "group size must be in the interval [2,32]", 6305 Loc)) { 6306 return false; 6307 } 6308 if (!isPowerOf2_64(GroupSize)) { 6309 Error(Loc, "group size must be a power of two"); 6310 return false; 6311 } 6312 if (parseSwizzleOperand(LaneIdx, 6313 0, GroupSize - 1, 6314 "lane id must be in the interval [0,group size - 1]", 6315 Loc)) { 6316 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 6317 return true; 6318 } 6319 return false; 6320 } 6321 6322 bool 6323 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6324 using namespace llvm::AMDGPU::Swizzle; 6325 6326 SMLoc Loc; 6327 int64_t GroupSize; 6328 6329 if (!parseSwizzleOperand(GroupSize, 6330 2, 32, 6331 "group size must be in the interval [2,32]", 6332 Loc)) { 6333 return false; 6334 } 6335 if (!isPowerOf2_64(GroupSize)) { 6336 Error(Loc, "group size must be a power of two"); 6337 return false; 6338 } 6339 6340 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6341 return true; 6342 } 6343 6344 bool 6345 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6346 using namespace llvm::AMDGPU::Swizzle; 6347 6348 SMLoc Loc; 6349 int64_t GroupSize; 6350 6351 if (!parseSwizzleOperand(GroupSize, 6352 1, 16, 6353 "group size must be in the interval [1,16]", 6354 Loc)) { 6355 return false; 6356 } 6357 if (!isPowerOf2_64(GroupSize)) { 6358 Error(Loc, "group size must be a power of two"); 6359 return false; 6360 } 6361 6362 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6363 return true; 6364 } 6365 6366 bool 6367 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6368 using namespace llvm::AMDGPU::Swizzle; 6369 6370 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6371 return false; 6372 } 6373 6374 StringRef Ctl; 6375 SMLoc StrLoc = Parser.getTok().getLoc(); 6376 if (!parseString(Ctl)) { 6377 return false; 6378 } 6379 if (Ctl.size() != BITMASK_WIDTH) { 6380 Error(StrLoc, "expected a 5-character mask"); 6381 return false; 6382 } 6383 6384 unsigned AndMask = 0; 6385 unsigned OrMask = 0; 6386 unsigned XorMask = 0; 6387 6388 for (size_t i = 0; i < Ctl.size(); ++i) { 6389 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6390 switch(Ctl[i]) { 6391 default: 6392 Error(StrLoc, "invalid mask"); 6393 return false; 6394 case '0': 6395 break; 6396 case '1': 6397 OrMask |= Mask; 6398 break; 6399 case 'p': 6400 AndMask |= Mask; 6401 break; 6402 case 'i': 6403 AndMask |= Mask; 6404 XorMask |= Mask; 6405 break; 6406 } 6407 } 6408 6409 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6410 return true; 6411 } 6412 6413 bool 6414 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6415 6416 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6417 6418 if (!parseExpr(Imm, "a swizzle macro")) { 6419 return false; 6420 } 6421 if (!isUInt<16>(Imm)) { 6422 Error(OffsetLoc, "expected a 16-bit offset"); 6423 return false; 6424 } 6425 return true; 6426 } 6427 6428 bool 6429 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6430 using namespace llvm::AMDGPU::Swizzle; 6431 6432 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6433 6434 SMLoc ModeLoc = Parser.getTok().getLoc(); 6435 bool Ok = false; 6436 6437 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6438 Ok = parseSwizzleQuadPerm(Imm); 6439 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6440 Ok = parseSwizzleBitmaskPerm(Imm); 6441 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6442 Ok = parseSwizzleBroadcast(Imm); 6443 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6444 Ok = parseSwizzleSwap(Imm); 6445 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6446 Ok = parseSwizzleReverse(Imm); 6447 } else { 6448 Error(ModeLoc, "expected a swizzle mode"); 6449 } 6450 6451 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6452 } 6453 6454 return false; 6455 } 6456 6457 OperandMatchResultTy 6458 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6459 SMLoc S = Parser.getTok().getLoc(); 6460 int64_t Imm = 0; 6461 6462 if (trySkipId("offset")) { 6463 6464 bool Ok = false; 6465 if (skipToken(AsmToken::Colon, "expected a colon")) { 6466 if (trySkipId("swizzle")) { 6467 Ok = parseSwizzleMacro(Imm); 6468 } else { 6469 Ok = parseSwizzleOffset(Imm); 6470 } 6471 } 6472 6473 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6474 6475 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6476 } else { 6477 // Swizzle "offset" operand is optional. 6478 // If it is omitted, try parsing other optional operands. 6479 return parseOptionalOpr(Operands); 6480 } 6481 } 6482 6483 bool 6484 AMDGPUOperand::isSwizzle() const { 6485 return isImmTy(ImmTySwizzle); 6486 } 6487 6488 //===----------------------------------------------------------------------===// 6489 // VGPR Index Mode 6490 //===----------------------------------------------------------------------===// 6491 6492 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6493 6494 using namespace llvm::AMDGPU::VGPRIndexMode; 6495 6496 if (trySkipToken(AsmToken::RParen)) { 6497 return OFF; 6498 } 6499 6500 int64_t Imm = 0; 6501 6502 while (true) { 6503 unsigned Mode = 0; 6504 SMLoc S = Parser.getTok().getLoc(); 6505 6506 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6507 if (trySkipId(IdSymbolic[ModeId])) { 6508 Mode = 1 << ModeId; 6509 break; 6510 } 6511 } 6512 6513 if (Mode == 0) { 6514 Error(S, (Imm == 0)? 6515 "expected a VGPR index mode or a closing parenthesis" : 6516 "expected a VGPR index mode"); 6517 return UNDEF; 6518 } 6519 6520 if (Imm & Mode) { 6521 Error(S, "duplicate VGPR index mode"); 6522 return UNDEF; 6523 } 6524 Imm |= Mode; 6525 6526 if (trySkipToken(AsmToken::RParen)) 6527 break; 6528 if (!skipToken(AsmToken::Comma, 6529 "expected a comma or a closing parenthesis")) 6530 return UNDEF; 6531 } 6532 6533 return Imm; 6534 } 6535 6536 OperandMatchResultTy 6537 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6538 6539 using namespace llvm::AMDGPU::VGPRIndexMode; 6540 6541 int64_t Imm = 0; 6542 SMLoc S = Parser.getTok().getLoc(); 6543 6544 if (getLexer().getKind() == AsmToken::Identifier && 6545 Parser.getTok().getString() == "gpr_idx" && 6546 getLexer().peekTok().is(AsmToken::LParen)) { 6547 6548 Parser.Lex(); 6549 Parser.Lex(); 6550 6551 Imm = parseGPRIdxMacro(); 6552 if (Imm == UNDEF) 6553 return MatchOperand_ParseFail; 6554 6555 } else { 6556 if (getParser().parseAbsoluteExpression(Imm)) 6557 return MatchOperand_ParseFail; 6558 if (Imm < 0 || !isUInt<4>(Imm)) { 6559 Error(S, "invalid immediate: only 4-bit values are legal"); 6560 return MatchOperand_ParseFail; 6561 } 6562 } 6563 6564 Operands.push_back( 6565 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6566 return MatchOperand_Success; 6567 } 6568 6569 bool AMDGPUOperand::isGPRIdxMode() const { 6570 return isImmTy(ImmTyGprIdxMode); 6571 } 6572 6573 //===----------------------------------------------------------------------===// 6574 // sopp branch targets 6575 //===----------------------------------------------------------------------===// 6576 6577 OperandMatchResultTy 6578 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6579 6580 // Make sure we are not parsing something 6581 // that looks like a label or an expression but is not. 6582 // This will improve error messages. 6583 if (isRegister() || isModifier()) 6584 return MatchOperand_NoMatch; 6585 6586 if (!parseExpr(Operands)) 6587 return MatchOperand_ParseFail; 6588 6589 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6590 assert(Opr.isImm() || Opr.isExpr()); 6591 SMLoc Loc = Opr.getStartLoc(); 6592 6593 // Currently we do not support arbitrary expressions as branch targets. 6594 // Only labels and absolute expressions are accepted. 6595 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6596 Error(Loc, "expected an absolute expression or a label"); 6597 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6598 Error(Loc, "expected a 16-bit signed jump offset"); 6599 } 6600 6601 return MatchOperand_Success; 6602 } 6603 6604 //===----------------------------------------------------------------------===// 6605 // Boolean holding registers 6606 //===----------------------------------------------------------------------===// 6607 6608 OperandMatchResultTy 6609 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6610 return parseReg(Operands); 6611 } 6612 6613 //===----------------------------------------------------------------------===// 6614 // mubuf 6615 //===----------------------------------------------------------------------===// 6616 6617 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6618 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6619 } 6620 6621 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6622 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6623 } 6624 6625 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC_1() const { 6626 return AMDGPUOperand::CreateImm(this, -1, SMLoc(), AMDGPUOperand::ImmTyGLC); 6627 } 6628 6629 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6630 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6631 } 6632 6633 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6634 const OperandVector &Operands, 6635 bool IsAtomic, 6636 bool IsAtomicReturn, 6637 bool IsLds) { 6638 bool IsLdsOpcode = IsLds; 6639 bool HasLdsModifier = false; 6640 OptionalImmIndexMap OptionalIdx; 6641 assert(IsAtomicReturn ? IsAtomic : true); 6642 unsigned FirstOperandIdx = 1; 6643 6644 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6645 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6646 6647 // Add the register arguments 6648 if (Op.isReg()) { 6649 Op.addRegOperands(Inst, 1); 6650 // Insert a tied src for atomic return dst. 6651 // This cannot be postponed as subsequent calls to 6652 // addImmOperands rely on correct number of MC operands. 6653 if (IsAtomicReturn && i == FirstOperandIdx) 6654 Op.addRegOperands(Inst, 1); 6655 continue; 6656 } 6657 6658 // Handle the case where soffset is an immediate 6659 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6660 Op.addImmOperands(Inst, 1); 6661 continue; 6662 } 6663 6664 HasLdsModifier |= Op.isLDS(); 6665 6666 // Handle tokens like 'offen' which are sometimes hard-coded into the 6667 // asm string. There are no MCInst operands for these. 6668 if (Op.isToken()) { 6669 continue; 6670 } 6671 assert(Op.isImm()); 6672 6673 // Handle optional arguments 6674 OptionalIdx[Op.getImmTy()] = i; 6675 } 6676 6677 // This is a workaround for an llvm quirk which may result in an 6678 // incorrect instruction selection. Lds and non-lds versions of 6679 // MUBUF instructions are identical except that lds versions 6680 // have mandatory 'lds' modifier. However this modifier follows 6681 // optional modifiers and llvm asm matcher regards this 'lds' 6682 // modifier as an optional one. As a result, an lds version 6683 // of opcode may be selected even if it has no 'lds' modifier. 6684 if (IsLdsOpcode && !HasLdsModifier) { 6685 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6686 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6687 Inst.setOpcode(NoLdsOpcode); 6688 IsLdsOpcode = false; 6689 } 6690 } 6691 6692 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6693 if (!IsAtomic || IsAtomicReturn) { 6694 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6695 } 6696 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6697 6698 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6699 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6700 } 6701 6702 if (isGFX10Plus()) 6703 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6704 } 6705 6706 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6707 OptionalImmIndexMap OptionalIdx; 6708 6709 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6710 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6711 6712 // Add the register arguments 6713 if (Op.isReg()) { 6714 Op.addRegOperands(Inst, 1); 6715 continue; 6716 } 6717 6718 // Handle the case where soffset is an immediate 6719 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6720 Op.addImmOperands(Inst, 1); 6721 continue; 6722 } 6723 6724 // Handle tokens like 'offen' which are sometimes hard-coded into the 6725 // asm string. There are no MCInst operands for these. 6726 if (Op.isToken()) { 6727 continue; 6728 } 6729 assert(Op.isImm()); 6730 6731 // Handle optional arguments 6732 OptionalIdx[Op.getImmTy()] = i; 6733 } 6734 6735 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6736 AMDGPUOperand::ImmTyOffset); 6737 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6738 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6739 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6741 6742 if (isGFX10Plus()) 6743 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6744 } 6745 6746 //===----------------------------------------------------------------------===// 6747 // mimg 6748 //===----------------------------------------------------------------------===// 6749 6750 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6751 bool IsAtomic) { 6752 unsigned I = 1; 6753 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6754 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6755 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6756 } 6757 6758 if (IsAtomic) { 6759 // Add src, same as dst 6760 assert(Desc.getNumDefs() == 1); 6761 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6762 } 6763 6764 OptionalImmIndexMap OptionalIdx; 6765 6766 for (unsigned E = Operands.size(); I != E; ++I) { 6767 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6768 6769 // Add the register arguments 6770 if (Op.isReg()) { 6771 Op.addRegOperands(Inst, 1); 6772 } else if (Op.isImmModifier()) { 6773 OptionalIdx[Op.getImmTy()] = I; 6774 } else if (!Op.isToken()) { 6775 llvm_unreachable("unexpected operand type"); 6776 } 6777 } 6778 6779 bool IsGFX10Plus = isGFX10Plus(); 6780 6781 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6782 if (IsGFX10Plus) 6783 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6784 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6785 if (IsGFX10Plus) 6786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6789 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6790 if (IsGFX10Plus) 6791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6792 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6793 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6794 if (!IsGFX10Plus) 6795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6796 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6797 } 6798 6799 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6800 cvtMIMG(Inst, Operands, true); 6801 } 6802 6803 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6804 const OperandVector &Operands) { 6805 for (unsigned I = 1; I < Operands.size(); ++I) { 6806 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6807 if (Operand.isReg()) 6808 Operand.addRegOperands(Inst, 1); 6809 } 6810 6811 Inst.addOperand(MCOperand::createImm(1)); // a16 6812 } 6813 6814 //===----------------------------------------------------------------------===// 6815 // smrd 6816 //===----------------------------------------------------------------------===// 6817 6818 bool AMDGPUOperand::isSMRDOffset8() const { 6819 return isImm() && isUInt<8>(getImm()); 6820 } 6821 6822 bool AMDGPUOperand::isSMEMOffset() const { 6823 return isImm(); // Offset range is checked later by validator. 6824 } 6825 6826 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6827 // 32-bit literals are only supported on CI and we only want to use them 6828 // when the offset is > 8-bits. 6829 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6830 } 6831 6832 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6833 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6834 } 6835 6836 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6837 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6838 } 6839 6840 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6841 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6842 } 6843 6844 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6845 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6846 } 6847 6848 //===----------------------------------------------------------------------===// 6849 // vop3 6850 //===----------------------------------------------------------------------===// 6851 6852 static bool ConvertOmodMul(int64_t &Mul) { 6853 if (Mul != 1 && Mul != 2 && Mul != 4) 6854 return false; 6855 6856 Mul >>= 1; 6857 return true; 6858 } 6859 6860 static bool ConvertOmodDiv(int64_t &Div) { 6861 if (Div == 1) { 6862 Div = 0; 6863 return true; 6864 } 6865 6866 if (Div == 2) { 6867 Div = 3; 6868 return true; 6869 } 6870 6871 return false; 6872 } 6873 6874 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6875 if (BoundCtrl == 0) { 6876 BoundCtrl = 1; 6877 return true; 6878 } 6879 6880 if (BoundCtrl == -1) { 6881 BoundCtrl = 0; 6882 return true; 6883 } 6884 6885 return false; 6886 } 6887 6888 // Note: the order in this table matches the order of operands in AsmString. 6889 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6890 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6891 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6892 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6893 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6894 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6895 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6896 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6897 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6898 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6899 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6900 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6901 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6902 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6903 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6904 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6905 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6906 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6907 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6908 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6909 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6910 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6911 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6912 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6913 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6914 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6915 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6916 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6917 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6918 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6919 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6920 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6921 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6922 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6923 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6924 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6925 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6926 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6927 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6928 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6929 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6930 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6931 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6932 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6933 }; 6934 6935 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6936 6937 OperandMatchResultTy res = parseOptionalOpr(Operands); 6938 6939 // This is a hack to enable hardcoded mandatory operands which follow 6940 // optional operands. 6941 // 6942 // Current design assumes that all operands after the first optional operand 6943 // are also optional. However implementation of some instructions violates 6944 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6945 // 6946 // To alleviate this problem, we have to (implicitly) parse extra operands 6947 // to make sure autogenerated parser of custom operands never hit hardcoded 6948 // mandatory operands. 6949 6950 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6951 if (res != MatchOperand_Success || 6952 isToken(AsmToken::EndOfStatement)) 6953 break; 6954 6955 trySkipToken(AsmToken::Comma); 6956 res = parseOptionalOpr(Operands); 6957 } 6958 6959 return res; 6960 } 6961 6962 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6963 OperandMatchResultTy res; 6964 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6965 // try to parse any optional operand here 6966 if (Op.IsBit) { 6967 res = parseNamedBit(Op.Name, Operands, Op.Type); 6968 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6969 res = parseOModOperand(Operands); 6970 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6971 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6972 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6973 res = parseSDWASel(Operands, Op.Name, Op.Type); 6974 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6975 res = parseSDWADstUnused(Operands); 6976 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6977 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6978 Op.Type == AMDGPUOperand::ImmTyNegLo || 6979 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6980 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6981 Op.ConvertResult); 6982 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6983 res = parseDim(Operands); 6984 } else { 6985 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6986 } 6987 if (res != MatchOperand_NoMatch) { 6988 return res; 6989 } 6990 } 6991 return MatchOperand_NoMatch; 6992 } 6993 6994 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6995 StringRef Name = Parser.getTok().getString(); 6996 if (Name == "mul") { 6997 return parseIntWithPrefix("mul", Operands, 6998 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6999 } 7000 7001 if (Name == "div") { 7002 return parseIntWithPrefix("div", Operands, 7003 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 7004 } 7005 7006 return MatchOperand_NoMatch; 7007 } 7008 7009 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 7010 cvtVOP3P(Inst, Operands); 7011 7012 int Opc = Inst.getOpcode(); 7013 7014 int SrcNum; 7015 const int Ops[] = { AMDGPU::OpName::src0, 7016 AMDGPU::OpName::src1, 7017 AMDGPU::OpName::src2 }; 7018 for (SrcNum = 0; 7019 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 7020 ++SrcNum); 7021 assert(SrcNum > 0); 7022 7023 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7024 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7025 7026 if ((OpSel & (1 << SrcNum)) != 0) { 7027 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 7028 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 7029 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 7030 } 7031 } 7032 7033 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 7034 // 1. This operand is input modifiers 7035 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 7036 // 2. This is not last operand 7037 && Desc.NumOperands > (OpNum + 1) 7038 // 3. Next operand is register class 7039 && Desc.OpInfo[OpNum + 1].RegClass != -1 7040 // 4. Next register is not tied to any other operand 7041 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 7042 } 7043 7044 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 7045 { 7046 OptionalImmIndexMap OptionalIdx; 7047 unsigned Opc = Inst.getOpcode(); 7048 7049 unsigned I = 1; 7050 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7051 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7052 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7053 } 7054 7055 for (unsigned E = Operands.size(); I != E; ++I) { 7056 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7057 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7058 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7059 } else if (Op.isInterpSlot() || 7060 Op.isInterpAttr() || 7061 Op.isAttrChan()) { 7062 Inst.addOperand(MCOperand::createImm(Op.getImm())); 7063 } else if (Op.isImmModifier()) { 7064 OptionalIdx[Op.getImmTy()] = I; 7065 } else { 7066 llvm_unreachable("unhandled operand type"); 7067 } 7068 } 7069 7070 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 7071 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 7072 } 7073 7074 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7075 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7076 } 7077 7078 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7080 } 7081 } 7082 7083 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 7084 OptionalImmIndexMap &OptionalIdx) { 7085 unsigned Opc = Inst.getOpcode(); 7086 7087 unsigned I = 1; 7088 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7089 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7090 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7091 } 7092 7093 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 7094 // This instruction has src modifiers 7095 for (unsigned E = Operands.size(); I != E; ++I) { 7096 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7097 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7098 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 7099 } else if (Op.isImmModifier()) { 7100 OptionalIdx[Op.getImmTy()] = I; 7101 } else if (Op.isRegOrImm()) { 7102 Op.addRegOrImmOperands(Inst, 1); 7103 } else { 7104 llvm_unreachable("unhandled operand type"); 7105 } 7106 } 7107 } else { 7108 // No src modifiers 7109 for (unsigned E = Operands.size(); I != E; ++I) { 7110 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7111 if (Op.isMod()) { 7112 OptionalIdx[Op.getImmTy()] = I; 7113 } else { 7114 Op.addRegOrImmOperands(Inst, 1); 7115 } 7116 } 7117 } 7118 7119 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 7120 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 7121 } 7122 7123 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 7124 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 7125 } 7126 7127 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 7128 // it has src2 register operand that is tied to dst operand 7129 // we don't allow modifiers for this operand in assembler so src2_modifiers 7130 // should be 0. 7131 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 7132 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 7133 Opc == AMDGPU::V_MAC_F32_e64_vi || 7134 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 || 7135 Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 || 7136 Opc == AMDGPU::V_MAC_F16_e64_vi || 7137 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 7138 Opc == AMDGPU::V_FMAC_F32_e64_vi || 7139 Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 || 7140 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 7141 auto it = Inst.begin(); 7142 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 7143 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 7144 ++it; 7145 // Copy the operand to ensure it's not invalidated when Inst grows. 7146 Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst 7147 } 7148 } 7149 7150 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 7151 OptionalImmIndexMap OptionalIdx; 7152 cvtVOP3(Inst, Operands, OptionalIdx); 7153 } 7154 7155 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 7156 const OperandVector &Operands) { 7157 OptionalImmIndexMap OptIdx; 7158 const int Opc = Inst.getOpcode(); 7159 const MCInstrDesc &Desc = MII.get(Opc); 7160 7161 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 7162 7163 cvtVOP3(Inst, Operands, OptIdx); 7164 7165 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 7166 assert(!IsPacked); 7167 Inst.addOperand(Inst.getOperand(0)); 7168 } 7169 7170 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 7171 // instruction, and then figure out where to actually put the modifiers 7172 7173 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 7174 7175 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 7176 if (OpSelHiIdx != -1) { 7177 int DefaultVal = IsPacked ? -1 : 0; 7178 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 7179 DefaultVal); 7180 } 7181 7182 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 7183 if (NegLoIdx != -1) { 7184 assert(IsPacked); 7185 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 7186 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 7187 } 7188 7189 const int Ops[] = { AMDGPU::OpName::src0, 7190 AMDGPU::OpName::src1, 7191 AMDGPU::OpName::src2 }; 7192 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 7193 AMDGPU::OpName::src1_modifiers, 7194 AMDGPU::OpName::src2_modifiers }; 7195 7196 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 7197 7198 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 7199 unsigned OpSelHi = 0; 7200 unsigned NegLo = 0; 7201 unsigned NegHi = 0; 7202 7203 if (OpSelHiIdx != -1) { 7204 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 7205 } 7206 7207 if (NegLoIdx != -1) { 7208 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 7209 NegLo = Inst.getOperand(NegLoIdx).getImm(); 7210 NegHi = Inst.getOperand(NegHiIdx).getImm(); 7211 } 7212 7213 for (int J = 0; J < 3; ++J) { 7214 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 7215 if (OpIdx == -1) 7216 break; 7217 7218 uint32_t ModVal = 0; 7219 7220 if ((OpSel & (1 << J)) != 0) 7221 ModVal |= SISrcMods::OP_SEL_0; 7222 7223 if ((OpSelHi & (1 << J)) != 0) 7224 ModVal |= SISrcMods::OP_SEL_1; 7225 7226 if ((NegLo & (1 << J)) != 0) 7227 ModVal |= SISrcMods::NEG; 7228 7229 if ((NegHi & (1 << J)) != 0) 7230 ModVal |= SISrcMods::NEG_HI; 7231 7232 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 7233 7234 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 7235 } 7236 } 7237 7238 //===----------------------------------------------------------------------===// 7239 // dpp 7240 //===----------------------------------------------------------------------===// 7241 7242 bool AMDGPUOperand::isDPP8() const { 7243 return isImmTy(ImmTyDPP8); 7244 } 7245 7246 bool AMDGPUOperand::isDPPCtrl() const { 7247 using namespace AMDGPU::DPP; 7248 7249 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 7250 if (result) { 7251 int64_t Imm = getImm(); 7252 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 7253 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 7254 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 7255 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 7256 (Imm == DppCtrl::WAVE_SHL1) || 7257 (Imm == DppCtrl::WAVE_ROL1) || 7258 (Imm == DppCtrl::WAVE_SHR1) || 7259 (Imm == DppCtrl::WAVE_ROR1) || 7260 (Imm == DppCtrl::ROW_MIRROR) || 7261 (Imm == DppCtrl::ROW_HALF_MIRROR) || 7262 (Imm == DppCtrl::BCAST15) || 7263 (Imm == DppCtrl::BCAST31) || 7264 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 7265 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 7266 } 7267 return false; 7268 } 7269 7270 //===----------------------------------------------------------------------===// 7271 // mAI 7272 //===----------------------------------------------------------------------===// 7273 7274 bool AMDGPUOperand::isBLGP() const { 7275 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 7276 } 7277 7278 bool AMDGPUOperand::isCBSZ() const { 7279 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 7280 } 7281 7282 bool AMDGPUOperand::isABID() const { 7283 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 7284 } 7285 7286 bool AMDGPUOperand::isS16Imm() const { 7287 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 7288 } 7289 7290 bool AMDGPUOperand::isU16Imm() const { 7291 return isImm() && isUInt<16>(getImm()); 7292 } 7293 7294 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 7295 if (!isGFX10Plus()) 7296 return MatchOperand_NoMatch; 7297 7298 SMLoc S = Parser.getTok().getLoc(); 7299 7300 if (getLexer().isNot(AsmToken::Identifier)) 7301 return MatchOperand_NoMatch; 7302 if (getLexer().getTok().getString() != "dim") 7303 return MatchOperand_NoMatch; 7304 7305 Parser.Lex(); 7306 if (getLexer().isNot(AsmToken::Colon)) 7307 return MatchOperand_ParseFail; 7308 7309 Parser.Lex(); 7310 7311 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 7312 // integer. 7313 std::string Token; 7314 if (getLexer().is(AsmToken::Integer)) { 7315 SMLoc Loc = getLexer().getTok().getEndLoc(); 7316 Token = std::string(getLexer().getTok().getString()); 7317 Parser.Lex(); 7318 if (getLexer().getTok().getLoc() != Loc) 7319 return MatchOperand_ParseFail; 7320 } 7321 if (getLexer().isNot(AsmToken::Identifier)) 7322 return MatchOperand_ParseFail; 7323 Token += getLexer().getTok().getString(); 7324 7325 StringRef DimId = Token; 7326 if (DimId.startswith("SQ_RSRC_IMG_")) 7327 DimId = DimId.substr(12); 7328 7329 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 7330 if (!DimInfo) 7331 return MatchOperand_ParseFail; 7332 7333 Parser.Lex(); 7334 7335 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7336 AMDGPUOperand::ImmTyDim)); 7337 return MatchOperand_Success; 7338 } 7339 7340 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7341 SMLoc S = Parser.getTok().getLoc(); 7342 StringRef Prefix; 7343 7344 if (getLexer().getKind() == AsmToken::Identifier) { 7345 Prefix = Parser.getTok().getString(); 7346 } else { 7347 return MatchOperand_NoMatch; 7348 } 7349 7350 if (Prefix != "dpp8") 7351 return parseDPPCtrl(Operands); 7352 if (!isGFX10Plus()) 7353 return MatchOperand_NoMatch; 7354 7355 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7356 7357 int64_t Sels[8]; 7358 7359 Parser.Lex(); 7360 if (getLexer().isNot(AsmToken::Colon)) 7361 return MatchOperand_ParseFail; 7362 7363 Parser.Lex(); 7364 if (getLexer().isNot(AsmToken::LBrac)) 7365 return MatchOperand_ParseFail; 7366 7367 Parser.Lex(); 7368 if (getParser().parseAbsoluteExpression(Sels[0])) 7369 return MatchOperand_ParseFail; 7370 if (0 > Sels[0] || 7 < Sels[0]) 7371 return MatchOperand_ParseFail; 7372 7373 for (size_t i = 1; i < 8; ++i) { 7374 if (getLexer().isNot(AsmToken::Comma)) 7375 return MatchOperand_ParseFail; 7376 7377 Parser.Lex(); 7378 if (getParser().parseAbsoluteExpression(Sels[i])) 7379 return MatchOperand_ParseFail; 7380 if (0 > Sels[i] || 7 < Sels[i]) 7381 return MatchOperand_ParseFail; 7382 } 7383 7384 if (getLexer().isNot(AsmToken::RBrac)) 7385 return MatchOperand_ParseFail; 7386 Parser.Lex(); 7387 7388 unsigned DPP8 = 0; 7389 for (size_t i = 0; i < 8; ++i) 7390 DPP8 |= (Sels[i] << (i * 3)); 7391 7392 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7393 return MatchOperand_Success; 7394 } 7395 7396 OperandMatchResultTy 7397 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7398 using namespace AMDGPU::DPP; 7399 7400 SMLoc S = Parser.getTok().getLoc(); 7401 StringRef Prefix; 7402 int64_t Int; 7403 7404 if (getLexer().getKind() == AsmToken::Identifier) { 7405 Prefix = Parser.getTok().getString(); 7406 } else { 7407 return MatchOperand_NoMatch; 7408 } 7409 7410 if (Prefix == "row_mirror") { 7411 Int = DppCtrl::ROW_MIRROR; 7412 Parser.Lex(); 7413 } else if (Prefix == "row_half_mirror") { 7414 Int = DppCtrl::ROW_HALF_MIRROR; 7415 Parser.Lex(); 7416 } else { 7417 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7418 if (Prefix != "quad_perm" 7419 && Prefix != "row_shl" 7420 && Prefix != "row_shr" 7421 && Prefix != "row_ror" 7422 && Prefix != "wave_shl" 7423 && Prefix != "wave_rol" 7424 && Prefix != "wave_shr" 7425 && Prefix != "wave_ror" 7426 && Prefix != "row_bcast" 7427 && Prefix != "row_share" 7428 && Prefix != "row_xmask") { 7429 return MatchOperand_NoMatch; 7430 } 7431 7432 if (!isGFX10Plus() && (Prefix == "row_share" || Prefix == "row_xmask")) 7433 return MatchOperand_NoMatch; 7434 7435 if (!isVI() && !isGFX9() && 7436 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7437 Prefix == "wave_rol" || Prefix == "wave_ror" || 7438 Prefix == "row_bcast")) 7439 return MatchOperand_NoMatch; 7440 7441 Parser.Lex(); 7442 if (getLexer().isNot(AsmToken::Colon)) 7443 return MatchOperand_ParseFail; 7444 7445 if (Prefix == "quad_perm") { 7446 // quad_perm:[%d,%d,%d,%d] 7447 Parser.Lex(); 7448 if (getLexer().isNot(AsmToken::LBrac)) 7449 return MatchOperand_ParseFail; 7450 Parser.Lex(); 7451 7452 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7453 return MatchOperand_ParseFail; 7454 7455 for (int i = 0; i < 3; ++i) { 7456 if (getLexer().isNot(AsmToken::Comma)) 7457 return MatchOperand_ParseFail; 7458 Parser.Lex(); 7459 7460 int64_t Temp; 7461 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7462 return MatchOperand_ParseFail; 7463 const int shift = i*2 + 2; 7464 Int += (Temp << shift); 7465 } 7466 7467 if (getLexer().isNot(AsmToken::RBrac)) 7468 return MatchOperand_ParseFail; 7469 Parser.Lex(); 7470 } else { 7471 // sel:%d 7472 Parser.Lex(); 7473 if (getParser().parseAbsoluteExpression(Int)) 7474 return MatchOperand_ParseFail; 7475 7476 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7477 Int |= DppCtrl::ROW_SHL0; 7478 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7479 Int |= DppCtrl::ROW_SHR0; 7480 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7481 Int |= DppCtrl::ROW_ROR0; 7482 } else if (Prefix == "wave_shl" && 1 == Int) { 7483 Int = DppCtrl::WAVE_SHL1; 7484 } else if (Prefix == "wave_rol" && 1 == Int) { 7485 Int = DppCtrl::WAVE_ROL1; 7486 } else if (Prefix == "wave_shr" && 1 == Int) { 7487 Int = DppCtrl::WAVE_SHR1; 7488 } else if (Prefix == "wave_ror" && 1 == Int) { 7489 Int = DppCtrl::WAVE_ROR1; 7490 } else if (Prefix == "row_bcast") { 7491 if (Int == 15) { 7492 Int = DppCtrl::BCAST15; 7493 } else if (Int == 31) { 7494 Int = DppCtrl::BCAST31; 7495 } else { 7496 return MatchOperand_ParseFail; 7497 } 7498 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7499 Int |= DppCtrl::ROW_SHARE_FIRST; 7500 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7501 Int |= DppCtrl::ROW_XMASK_FIRST; 7502 } else { 7503 return MatchOperand_ParseFail; 7504 } 7505 } 7506 } 7507 7508 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7509 return MatchOperand_Success; 7510 } 7511 7512 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7513 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7514 } 7515 7516 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7517 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7518 } 7519 7520 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7521 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7522 } 7523 7524 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7525 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7526 } 7527 7528 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7529 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7530 } 7531 7532 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7533 OptionalImmIndexMap OptionalIdx; 7534 7535 unsigned I = 1; 7536 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7537 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7538 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7539 } 7540 7541 int Fi = 0; 7542 for (unsigned E = Operands.size(); I != E; ++I) { 7543 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7544 MCOI::TIED_TO); 7545 if (TiedTo != -1) { 7546 assert((unsigned)TiedTo < Inst.getNumOperands()); 7547 // handle tied old or src2 for MAC instructions 7548 Inst.addOperand(Inst.getOperand(TiedTo)); 7549 } 7550 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7551 // Add the register arguments 7552 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7553 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7554 // Skip it. 7555 continue; 7556 } 7557 7558 if (IsDPP8) { 7559 if (Op.isDPP8()) { 7560 Op.addImmOperands(Inst, 1); 7561 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7562 Op.addRegWithFPInputModsOperands(Inst, 2); 7563 } else if (Op.isFI()) { 7564 Fi = Op.getImm(); 7565 } else if (Op.isReg()) { 7566 Op.addRegOperands(Inst, 1); 7567 } else { 7568 llvm_unreachable("Invalid operand type"); 7569 } 7570 } else { 7571 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7572 Op.addRegWithFPInputModsOperands(Inst, 2); 7573 } else if (Op.isDPPCtrl()) { 7574 Op.addImmOperands(Inst, 1); 7575 } else if (Op.isImm()) { 7576 // Handle optional arguments 7577 OptionalIdx[Op.getImmTy()] = I; 7578 } else { 7579 llvm_unreachable("Invalid operand type"); 7580 } 7581 } 7582 } 7583 7584 if (IsDPP8) { 7585 using namespace llvm::AMDGPU::DPP; 7586 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7587 } else { 7588 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7589 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7590 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7591 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7592 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7593 } 7594 } 7595 } 7596 7597 //===----------------------------------------------------------------------===// 7598 // sdwa 7599 //===----------------------------------------------------------------------===// 7600 7601 OperandMatchResultTy 7602 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7603 AMDGPUOperand::ImmTy Type) { 7604 using namespace llvm::AMDGPU::SDWA; 7605 7606 SMLoc S = Parser.getTok().getLoc(); 7607 StringRef Value; 7608 OperandMatchResultTy res; 7609 7610 res = parseStringWithPrefix(Prefix, Value); 7611 if (res != MatchOperand_Success) { 7612 return res; 7613 } 7614 7615 int64_t Int; 7616 Int = StringSwitch<int64_t>(Value) 7617 .Case("BYTE_0", SdwaSel::BYTE_0) 7618 .Case("BYTE_1", SdwaSel::BYTE_1) 7619 .Case("BYTE_2", SdwaSel::BYTE_2) 7620 .Case("BYTE_3", SdwaSel::BYTE_3) 7621 .Case("WORD_0", SdwaSel::WORD_0) 7622 .Case("WORD_1", SdwaSel::WORD_1) 7623 .Case("DWORD", SdwaSel::DWORD) 7624 .Default(0xffffffff); 7625 Parser.Lex(); // eat last token 7626 7627 if (Int == 0xffffffff) { 7628 return MatchOperand_ParseFail; 7629 } 7630 7631 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7632 return MatchOperand_Success; 7633 } 7634 7635 OperandMatchResultTy 7636 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7637 using namespace llvm::AMDGPU::SDWA; 7638 7639 SMLoc S = Parser.getTok().getLoc(); 7640 StringRef Value; 7641 OperandMatchResultTy res; 7642 7643 res = parseStringWithPrefix("dst_unused", Value); 7644 if (res != MatchOperand_Success) { 7645 return res; 7646 } 7647 7648 int64_t Int; 7649 Int = StringSwitch<int64_t>(Value) 7650 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7651 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7652 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7653 .Default(0xffffffff); 7654 Parser.Lex(); // eat last token 7655 7656 if (Int == 0xffffffff) { 7657 return MatchOperand_ParseFail; 7658 } 7659 7660 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7661 return MatchOperand_Success; 7662 } 7663 7664 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7665 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7666 } 7667 7668 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7669 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7670 } 7671 7672 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7673 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7674 } 7675 7676 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7677 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7678 } 7679 7680 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7681 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7682 } 7683 7684 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7685 uint64_t BasicInstType, 7686 bool SkipDstVcc, 7687 bool SkipSrcVcc) { 7688 using namespace llvm::AMDGPU::SDWA; 7689 7690 OptionalImmIndexMap OptionalIdx; 7691 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7692 bool SkippedVcc = false; 7693 7694 unsigned I = 1; 7695 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7696 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7697 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7698 } 7699 7700 for (unsigned E = Operands.size(); I != E; ++I) { 7701 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7702 if (SkipVcc && !SkippedVcc && Op.isReg() && 7703 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7704 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7705 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7706 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7707 // Skip VCC only if we didn't skip it on previous iteration. 7708 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7709 if (BasicInstType == SIInstrFlags::VOP2 && 7710 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7711 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7712 SkippedVcc = true; 7713 continue; 7714 } else if (BasicInstType == SIInstrFlags::VOPC && 7715 Inst.getNumOperands() == 0) { 7716 SkippedVcc = true; 7717 continue; 7718 } 7719 } 7720 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7721 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7722 } else if (Op.isImm()) { 7723 // Handle optional arguments 7724 OptionalIdx[Op.getImmTy()] = I; 7725 } else { 7726 llvm_unreachable("Invalid operand type"); 7727 } 7728 SkippedVcc = false; 7729 } 7730 7731 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7732 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7733 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7734 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7735 switch (BasicInstType) { 7736 case SIInstrFlags::VOP1: 7737 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7738 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7739 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7740 } 7741 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7743 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7744 break; 7745 7746 case SIInstrFlags::VOP2: 7747 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7748 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7749 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7750 } 7751 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7752 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7753 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7754 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7755 break; 7756 7757 case SIInstrFlags::VOPC: 7758 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7759 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7760 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7761 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7762 break; 7763 7764 default: 7765 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7766 } 7767 } 7768 7769 // special case v_mac_{f16, f32}: 7770 // it has src2 register operand that is tied to dst operand 7771 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7772 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7773 auto it = Inst.begin(); 7774 std::advance( 7775 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7776 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7777 } 7778 } 7779 7780 //===----------------------------------------------------------------------===// 7781 // mAI 7782 //===----------------------------------------------------------------------===// 7783 7784 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7785 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7786 } 7787 7788 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7789 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7790 } 7791 7792 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7793 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7794 } 7795 7796 /// Force static initialization. 7797 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7798 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7799 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7800 } 7801 7802 #define GET_REGISTER_MATCHER 7803 #define GET_MATCHER_IMPLEMENTATION 7804 #define GET_MNEMONIC_SPELL_CHECKER 7805 #define GET_MNEMONIC_CHECKER 7806 #include "AMDGPUGenAsmMatcher.inc" 7807 7808 // This fuction should be defined after auto-generated include so that we have 7809 // MatchClassKind enum defined 7810 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7811 unsigned Kind) { 7812 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7813 // But MatchInstructionImpl() expects to meet token and fails to validate 7814 // operand. This method checks if we are given immediate operand but expect to 7815 // get corresponding token. 7816 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7817 switch (Kind) { 7818 case MCK_addr64: 7819 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7820 case MCK_gds: 7821 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7822 case MCK_lds: 7823 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7824 case MCK_glc: 7825 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7826 case MCK_idxen: 7827 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7828 case MCK_offen: 7829 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7830 case MCK_SSrcB32: 7831 // When operands have expression values, they will return true for isToken, 7832 // because it is not possible to distinguish between a token and an 7833 // expression at parse time. MatchInstructionImpl() will always try to 7834 // match an operand as a token, when isToken returns true, and when the 7835 // name of the expression is not a valid token, the match will fail, 7836 // so we need to handle it here. 7837 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7838 case MCK_SSrcF32: 7839 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7840 case MCK_SoppBrTarget: 7841 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7842 case MCK_VReg32OrOff: 7843 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7844 case MCK_InterpSlot: 7845 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7846 case MCK_Attr: 7847 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7848 case MCK_AttrChan: 7849 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7850 case MCK_ImmSMEMOffset: 7851 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7852 case MCK_SReg_64: 7853 case MCK_SReg_64_XEXEC: 7854 // Null is defined as a 32-bit register but 7855 // it should also be enabled with 64-bit operands. 7856 // The following code enables it for SReg_64 operands 7857 // used as source and destination. Remaining source 7858 // operands are handled in isInlinableImm. 7859 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7860 default: 7861 return Match_InvalidOperand; 7862 } 7863 } 7864 7865 //===----------------------------------------------------------------------===// 7866 // endpgm 7867 //===----------------------------------------------------------------------===// 7868 7869 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7870 SMLoc S = Parser.getTok().getLoc(); 7871 int64_t Imm = 0; 7872 7873 if (!parseExpr(Imm)) { 7874 // The operand is optional, if not present default to 0 7875 Imm = 0; 7876 } 7877 7878 if (!isUInt<16>(Imm)) { 7879 Error(S, "expected a 16-bit value"); 7880 return MatchOperand_ParseFail; 7881 } 7882 7883 Operands.push_back( 7884 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7885 return MatchOperand_Success; 7886 } 7887 7888 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7889