1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0u; 106 Operand |= Neg ? SISrcMods::NEG : 0u; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0u; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyGprIdxMode, 177 ImmTyEndpgm, 178 ImmTyHigh 179 }; 180 181 private: 182 struct TokOp { 183 const char *Data; 184 unsigned Length; 185 }; 186 187 struct ImmOp { 188 int64_t Val; 189 ImmTy Type; 190 bool IsFPImm; 191 Modifiers Mods; 192 }; 193 194 struct RegOp { 195 unsigned RegNo; 196 Modifiers Mods; 197 }; 198 199 union { 200 TokOp Tok; 201 ImmOp Imm; 202 RegOp Reg; 203 const MCExpr *Expr; 204 }; 205 206 public: 207 bool isToken() const override { 208 if (Kind == Token) 209 return true; 210 211 if (Kind != Expression || !Expr) 212 return false; 213 214 // When parsing operands, we can't always tell if something was meant to be 215 // a token, like 'gds', or an expression that references a global variable. 216 // In this case, we assume the string is an expression, and if we need to 217 // interpret is a token, then we treat the symbol name as the token. 218 return isa<MCSymbolRefExpr>(Expr); 219 } 220 221 bool isImm() const override { 222 return Kind == Immediate; 223 } 224 225 bool isInlinableImm(MVT type) const; 226 bool isLiteralImm(MVT type) const; 227 228 bool isRegKind() const { 229 return Kind == Register; 230 } 231 232 bool isReg() const override { 233 return isRegKind() && !hasModifiers(); 234 } 235 236 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 237 return isRegClass(RCID) || isInlinableImm(type); 238 } 239 240 bool isRegOrImmWithInt16InputMods() const { 241 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 242 } 243 244 bool isRegOrImmWithInt32InputMods() const { 245 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 246 } 247 248 bool isRegOrImmWithInt64InputMods() const { 249 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 250 } 251 252 bool isRegOrImmWithFP16InputMods() const { 253 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 254 } 255 256 bool isRegOrImmWithFP32InputMods() const { 257 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 258 } 259 260 bool isRegOrImmWithFP64InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 262 } 263 264 bool isVReg() const { 265 return isRegClass(AMDGPU::VGPR_32RegClassID) || 266 isRegClass(AMDGPU::VReg_64RegClassID) || 267 isRegClass(AMDGPU::VReg_96RegClassID) || 268 isRegClass(AMDGPU::VReg_128RegClassID) || 269 isRegClass(AMDGPU::VReg_256RegClassID) || 270 isRegClass(AMDGPU::VReg_512RegClassID); 271 } 272 273 bool isVReg32() const { 274 return isRegClass(AMDGPU::VGPR_32RegClassID); 275 } 276 277 bool isVReg32OrOff() const { 278 return isOff() || isVReg32(); 279 } 280 281 bool isSDWAOperand(MVT type) const; 282 bool isSDWAFP16Operand() const; 283 bool isSDWAFP32Operand() const; 284 bool isSDWAInt16Operand() const; 285 bool isSDWAInt32Operand() const; 286 287 bool isImmTy(ImmTy ImmT) const { 288 return isImm() && Imm.Type == ImmT; 289 } 290 291 bool isImmModifier() const { 292 return isImm() && Imm.Type != ImmTyNone; 293 } 294 295 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 296 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 297 bool isDMask() const { return isImmTy(ImmTyDMask); } 298 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 299 bool isDA() const { return isImmTy(ImmTyDA); } 300 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 301 bool isLWE() const { return isImmTy(ImmTyLWE); } 302 bool isOff() const { return isImmTy(ImmTyOff); } 303 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 304 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 305 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 306 bool isOffen() const { return isImmTy(ImmTyOffen); } 307 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 308 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 309 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 310 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 311 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 312 313 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 314 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 315 bool isGDS() const { return isImmTy(ImmTyGDS); } 316 bool isLDS() const { return isImmTy(ImmTyLDS); } 317 bool isGLC() const { return isImmTy(ImmTyGLC); } 318 bool isSLC() const { return isImmTy(ImmTySLC); } 319 bool isTFE() const { return isImmTy(ImmTyTFE); } 320 bool isD16() const { return isImmTy(ImmTyD16); } 321 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 322 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 323 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 324 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 325 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 326 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 327 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 328 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 329 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 330 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 331 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 332 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 333 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 334 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 335 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 336 bool isHigh() const { return isImmTy(ImmTyHigh); } 337 338 bool isMod() const { 339 return isClampSI() || isOModSI(); 340 } 341 342 bool isRegOrImm() const { 343 return isReg() || isImm(); 344 } 345 346 bool isRegClass(unsigned RCID) const; 347 348 bool isInlineValue() const; 349 350 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 351 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 352 } 353 354 bool isSCSrcB16() const { 355 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 356 } 357 358 bool isSCSrcV2B16() const { 359 return isSCSrcB16(); 360 } 361 362 bool isSCSrcB32() const { 363 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 364 } 365 366 bool isSCSrcB64() const { 367 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 368 } 369 370 bool isSCSrcF16() const { 371 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 372 } 373 374 bool isSCSrcV2F16() const { 375 return isSCSrcF16(); 376 } 377 378 bool isSCSrcF32() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 380 } 381 382 bool isSCSrcF64() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 384 } 385 386 bool isSSrcB32() const { 387 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 388 } 389 390 bool isSSrcB16() const { 391 return isSCSrcB16() || isLiteralImm(MVT::i16); 392 } 393 394 bool isSSrcV2B16() const { 395 llvm_unreachable("cannot happen"); 396 return isSSrcB16(); 397 } 398 399 bool isSSrcB64() const { 400 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 401 // See isVSrc64(). 402 return isSCSrcB64() || isLiteralImm(MVT::i64); 403 } 404 405 bool isSSrcF32() const { 406 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 407 } 408 409 bool isSSrcF64() const { 410 return isSCSrcB64() || isLiteralImm(MVT::f64); 411 } 412 413 bool isSSrcF16() const { 414 return isSCSrcB16() || isLiteralImm(MVT::f16); 415 } 416 417 bool isSSrcV2F16() const { 418 llvm_unreachable("cannot happen"); 419 return isSSrcF16(); 420 } 421 422 bool isSSrcOrLdsB32() const { 423 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 424 isLiteralImm(MVT::i32) || isExpr(); 425 } 426 427 bool isVCSrcB32() const { 428 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 429 } 430 431 bool isVCSrcB64() const { 432 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 433 } 434 435 bool isVCSrcB16() const { 436 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 437 } 438 439 bool isVCSrcV2B16() const { 440 return isVCSrcB16(); 441 } 442 443 bool isVCSrcF32() const { 444 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 445 } 446 447 bool isVCSrcF64() const { 448 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 449 } 450 451 bool isVCSrcF16() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 453 } 454 455 bool isVCSrcV2F16() const { 456 return isVCSrcF16(); 457 } 458 459 bool isVSrcB32() const { 460 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 461 } 462 463 bool isVSrcB64() const { 464 return isVCSrcF64() || isLiteralImm(MVT::i64); 465 } 466 467 bool isVSrcB16() const { 468 return isVCSrcF16() || isLiteralImm(MVT::i16); 469 } 470 471 bool isVSrcV2B16() const { 472 llvm_unreachable("cannot happen"); 473 return isVSrcB16(); 474 } 475 476 bool isVSrcF32() const { 477 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 478 } 479 480 bool isVSrcF64() const { 481 return isVCSrcF64() || isLiteralImm(MVT::f64); 482 } 483 484 bool isVSrcF16() const { 485 return isVCSrcF16() || isLiteralImm(MVT::f16); 486 } 487 488 bool isVSrcV2F16() const { 489 llvm_unreachable("cannot happen"); 490 return isVSrcF16(); 491 } 492 493 bool isKImmFP32() const { 494 return isLiteralImm(MVT::f32); 495 } 496 497 bool isKImmFP16() const { 498 return isLiteralImm(MVT::f16); 499 } 500 501 bool isMem() const override { 502 return false; 503 } 504 505 bool isExpr() const { 506 return Kind == Expression; 507 } 508 509 bool isSoppBrTarget() const { 510 return isExpr() || isImm(); 511 } 512 513 bool isSWaitCnt() const; 514 bool isHwreg() const; 515 bool isSendMsg() const; 516 bool isSwizzle() const; 517 bool isSMRDOffset8() const; 518 bool isSMRDOffset20() const; 519 bool isSMRDLiteralOffset() const; 520 bool isDPPCtrl() const; 521 bool isGPRIdxMode() const; 522 bool isS16Imm() const; 523 bool isU16Imm() const; 524 bool isEndpgm() const; 525 526 StringRef getExpressionAsToken() const { 527 assert(isExpr()); 528 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 529 return S->getSymbol().getName(); 530 } 531 532 StringRef getToken() const { 533 assert(isToken()); 534 535 if (Kind == Expression) 536 return getExpressionAsToken(); 537 538 return StringRef(Tok.Data, Tok.Length); 539 } 540 541 int64_t getImm() const { 542 assert(isImm()); 543 return Imm.Val; 544 } 545 546 ImmTy getImmTy() const { 547 assert(isImm()); 548 return Imm.Type; 549 } 550 551 unsigned getReg() const override { 552 assert(isRegKind()); 553 return Reg.RegNo; 554 } 555 556 SMLoc getStartLoc() const override { 557 return StartLoc; 558 } 559 560 SMLoc getEndLoc() const override { 561 return EndLoc; 562 } 563 564 SMRange getLocRange() const { 565 return SMRange(StartLoc, EndLoc); 566 } 567 568 Modifiers getModifiers() const { 569 assert(isRegKind() || isImmTy(ImmTyNone)); 570 return isRegKind() ? Reg.Mods : Imm.Mods; 571 } 572 573 void setModifiers(Modifiers Mods) { 574 assert(isRegKind() || isImmTy(ImmTyNone)); 575 if (isRegKind()) 576 Reg.Mods = Mods; 577 else 578 Imm.Mods = Mods; 579 } 580 581 bool hasModifiers() const { 582 return getModifiers().hasModifiers(); 583 } 584 585 bool hasFPModifiers() const { 586 return getModifiers().hasFPModifiers(); 587 } 588 589 bool hasIntModifiers() const { 590 return getModifiers().hasIntModifiers(); 591 } 592 593 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 594 595 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 596 597 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 598 599 template <unsigned Bitwidth> 600 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 601 602 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 603 addKImmFPOperands<16>(Inst, N); 604 } 605 606 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 607 addKImmFPOperands<32>(Inst, N); 608 } 609 610 void addRegOperands(MCInst &Inst, unsigned N) const; 611 612 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 613 if (isRegKind()) 614 addRegOperands(Inst, N); 615 else if (isExpr()) 616 Inst.addOperand(MCOperand::createExpr(Expr)); 617 else 618 addImmOperands(Inst, N); 619 } 620 621 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 622 Modifiers Mods = getModifiers(); 623 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 624 if (isRegKind()) { 625 addRegOperands(Inst, N); 626 } else { 627 addImmOperands(Inst, N, false); 628 } 629 } 630 631 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 632 assert(!hasIntModifiers()); 633 addRegOrImmWithInputModsOperands(Inst, N); 634 } 635 636 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 637 assert(!hasFPModifiers()); 638 addRegOrImmWithInputModsOperands(Inst, N); 639 } 640 641 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 642 Modifiers Mods = getModifiers(); 643 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 644 assert(isRegKind()); 645 addRegOperands(Inst, N); 646 } 647 648 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 649 assert(!hasIntModifiers()); 650 addRegWithInputModsOperands(Inst, N); 651 } 652 653 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 654 assert(!hasFPModifiers()); 655 addRegWithInputModsOperands(Inst, N); 656 } 657 658 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 659 if (isImm()) 660 addImmOperands(Inst, N); 661 else { 662 assert(isExpr()); 663 Inst.addOperand(MCOperand::createExpr(Expr)); 664 } 665 } 666 667 static void printImmTy(raw_ostream& OS, ImmTy Type) { 668 switch (Type) { 669 case ImmTyNone: OS << "None"; break; 670 case ImmTyGDS: OS << "GDS"; break; 671 case ImmTyLDS: OS << "LDS"; break; 672 case ImmTyOffen: OS << "Offen"; break; 673 case ImmTyIdxen: OS << "Idxen"; break; 674 case ImmTyAddr64: OS << "Addr64"; break; 675 case ImmTyOffset: OS << "Offset"; break; 676 case ImmTyInstOffset: OS << "InstOffset"; break; 677 case ImmTyOffset0: OS << "Offset0"; break; 678 case ImmTyOffset1: OS << "Offset1"; break; 679 case ImmTyGLC: OS << "GLC"; break; 680 case ImmTySLC: OS << "SLC"; break; 681 case ImmTyTFE: OS << "TFE"; break; 682 case ImmTyD16: OS << "D16"; break; 683 case ImmTyFORMAT: OS << "FORMAT"; break; 684 case ImmTyClampSI: OS << "ClampSI"; break; 685 case ImmTyOModSI: OS << "OModSI"; break; 686 case ImmTyDppCtrl: OS << "DppCtrl"; break; 687 case ImmTyDppRowMask: OS << "DppRowMask"; break; 688 case ImmTyDppBankMask: OS << "DppBankMask"; break; 689 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 690 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 691 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 692 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 693 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 694 case ImmTyDMask: OS << "DMask"; break; 695 case ImmTyUNorm: OS << "UNorm"; break; 696 case ImmTyDA: OS << "DA"; break; 697 case ImmTyR128A16: OS << "R128A16"; break; 698 case ImmTyLWE: OS << "LWE"; break; 699 case ImmTyOff: OS << "Off"; break; 700 case ImmTyExpTgt: OS << "ExpTgt"; break; 701 case ImmTyExpCompr: OS << "ExpCompr"; break; 702 case ImmTyExpVM: OS << "ExpVM"; break; 703 case ImmTyHwreg: OS << "Hwreg"; break; 704 case ImmTySendMsg: OS << "SendMsg"; break; 705 case ImmTyInterpSlot: OS << "InterpSlot"; break; 706 case ImmTyInterpAttr: OS << "InterpAttr"; break; 707 case ImmTyAttrChan: OS << "AttrChan"; break; 708 case ImmTyOpSel: OS << "OpSel"; break; 709 case ImmTyOpSelHi: OS << "OpSelHi"; break; 710 case ImmTyNegLo: OS << "NegLo"; break; 711 case ImmTyNegHi: OS << "NegHi"; break; 712 case ImmTySwizzle: OS << "Swizzle"; break; 713 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 714 case ImmTyHigh: OS << "High"; break; 715 case ImmTyEndpgm: 716 OS << "Endpgm"; 717 break; 718 } 719 } 720 721 void print(raw_ostream &OS) const override { 722 switch (Kind) { 723 case Register: 724 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 725 break; 726 case Immediate: 727 OS << '<' << getImm(); 728 if (getImmTy() != ImmTyNone) { 729 OS << " type: "; printImmTy(OS, getImmTy()); 730 } 731 OS << " mods: " << Imm.Mods << '>'; 732 break; 733 case Token: 734 OS << '\'' << getToken() << '\''; 735 break; 736 case Expression: 737 OS << "<expr " << *Expr << '>'; 738 break; 739 } 740 } 741 742 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 743 int64_t Val, SMLoc Loc, 744 ImmTy Type = ImmTyNone, 745 bool IsFPImm = false) { 746 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 747 Op->Imm.Val = Val; 748 Op->Imm.IsFPImm = IsFPImm; 749 Op->Imm.Type = Type; 750 Op->Imm.Mods = Modifiers(); 751 Op->StartLoc = Loc; 752 Op->EndLoc = Loc; 753 return Op; 754 } 755 756 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 757 StringRef Str, SMLoc Loc, 758 bool HasExplicitEncodingSize = true) { 759 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 760 Res->Tok.Data = Str.data(); 761 Res->Tok.Length = Str.size(); 762 Res->StartLoc = Loc; 763 Res->EndLoc = Loc; 764 return Res; 765 } 766 767 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 768 unsigned RegNo, SMLoc S, 769 SMLoc E) { 770 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 771 Op->Reg.RegNo = RegNo; 772 Op->Reg.Mods = Modifiers(); 773 Op->StartLoc = S; 774 Op->EndLoc = E; 775 return Op; 776 } 777 778 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 779 const class MCExpr *Expr, SMLoc S) { 780 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 781 Op->Expr = Expr; 782 Op->StartLoc = S; 783 Op->EndLoc = S; 784 return Op; 785 } 786 }; 787 788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 789 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 790 return OS; 791 } 792 793 //===----------------------------------------------------------------------===// 794 // AsmParser 795 //===----------------------------------------------------------------------===// 796 797 // Holds info related to the current kernel, e.g. count of SGPRs used. 798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 799 // .amdgpu_hsa_kernel or at EOF. 800 class KernelScopeInfo { 801 int SgprIndexUnusedMin = -1; 802 int VgprIndexUnusedMin = -1; 803 MCContext *Ctx = nullptr; 804 805 void usesSgprAt(int i) { 806 if (i >= SgprIndexUnusedMin) { 807 SgprIndexUnusedMin = ++i; 808 if (Ctx) { 809 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 810 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 811 } 812 } 813 } 814 815 void usesVgprAt(int i) { 816 if (i >= VgprIndexUnusedMin) { 817 VgprIndexUnusedMin = ++i; 818 if (Ctx) { 819 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 820 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 821 } 822 } 823 } 824 825 public: 826 KernelScopeInfo() = default; 827 828 void initialize(MCContext &Context) { 829 Ctx = &Context; 830 usesSgprAt(SgprIndexUnusedMin = -1); 831 usesVgprAt(VgprIndexUnusedMin = -1); 832 } 833 834 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 835 switch (RegKind) { 836 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 837 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 838 default: break; 839 } 840 } 841 }; 842 843 class AMDGPUAsmParser : public MCTargetAsmParser { 844 MCAsmParser &Parser; 845 846 // Number of extra operands parsed after the first optional operand. 847 // This may be necessary to skip hardcoded mandatory operands. 848 static const unsigned MAX_OPR_LOOKAHEAD = 8; 849 850 unsigned ForcedEncodingSize = 0; 851 bool ForcedDPP = false; 852 bool ForcedSDWA = false; 853 KernelScopeInfo KernelScope; 854 855 /// @name Auto-generated Match Functions 856 /// { 857 858 #define GET_ASSEMBLER_HEADER 859 #include "AMDGPUGenAsmMatcher.inc" 860 861 /// } 862 863 private: 864 bool ParseAsAbsoluteExpression(uint32_t &Ret); 865 bool OutOfRangeError(SMRange Range); 866 /// Calculate VGPR/SGPR blocks required for given target, reserved 867 /// registers, and user-specified NextFreeXGPR values. 868 /// 869 /// \param Features [in] Target features, used for bug corrections. 870 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 871 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 872 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 873 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 874 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 875 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 876 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 877 /// \param VGPRBlocks [out] Result VGPR block count. 878 /// \param SGPRBlocks [out] Result SGPR block count. 879 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 880 bool FlatScrUsed, bool XNACKUsed, 881 unsigned NextFreeVGPR, SMRange VGPRRange, 882 unsigned NextFreeSGPR, SMRange SGPRRange, 883 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 884 bool ParseDirectiveAMDGCNTarget(); 885 bool ParseDirectiveAMDHSAKernel(); 886 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 887 bool ParseDirectiveHSACodeObjectVersion(); 888 bool ParseDirectiveHSACodeObjectISA(); 889 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 890 bool ParseDirectiveAMDKernelCodeT(); 891 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 892 bool ParseDirectiveAMDGPUHsaKernel(); 893 894 bool ParseDirectiveISAVersion(); 895 bool ParseDirectiveHSAMetadata(); 896 bool ParseDirectivePALMetadataBegin(); 897 bool ParseDirectivePALMetadata(); 898 899 /// Common code to parse out a block of text (typically YAML) between start and 900 /// end directives. 901 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 902 const char *AssemblerDirectiveEnd, 903 std::string &CollectString); 904 905 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 906 RegisterKind RegKind, unsigned Reg1, 907 unsigned RegNum); 908 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 909 unsigned& RegNum, unsigned& RegWidth, 910 unsigned *DwordRegIndex); 911 bool isRegister(); 912 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 913 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 914 void initializeGprCountSymbol(RegisterKind RegKind); 915 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 916 unsigned RegWidth); 917 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 918 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 919 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 920 bool IsGdsHardcoded); 921 922 public: 923 enum AMDGPUMatchResultTy { 924 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 925 }; 926 927 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 928 929 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 930 const MCInstrInfo &MII, 931 const MCTargetOptions &Options) 932 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 933 MCAsmParserExtension::Initialize(Parser); 934 935 if (getFeatureBits().none()) { 936 // Set default features. 937 copySTI().ToggleFeature("southern-islands"); 938 } 939 940 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 941 942 { 943 // TODO: make those pre-defined variables read-only. 944 // Currently there is none suitable machinery in the core llvm-mc for this. 945 // MCSymbol::isRedefinable is intended for another purpose, and 946 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 947 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 948 MCContext &Ctx = getContext(); 949 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 950 MCSymbol *Sym = 951 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 952 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 953 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 954 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 955 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 956 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 957 } else { 958 MCSymbol *Sym = 959 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 960 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 961 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 962 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 963 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 964 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 965 } 966 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 967 initializeGprCountSymbol(IS_VGPR); 968 initializeGprCountSymbol(IS_SGPR); 969 } else 970 KernelScope.initialize(getContext()); 971 } 972 } 973 974 bool hasXNACK() const { 975 return AMDGPU::hasXNACK(getSTI()); 976 } 977 978 bool hasMIMG_R128() const { 979 return AMDGPU::hasMIMG_R128(getSTI()); 980 } 981 982 bool hasPackedD16() const { 983 return AMDGPU::hasPackedD16(getSTI()); 984 } 985 986 bool isSI() const { 987 return AMDGPU::isSI(getSTI()); 988 } 989 990 bool isCI() const { 991 return AMDGPU::isCI(getSTI()); 992 } 993 994 bool isVI() const { 995 return AMDGPU::isVI(getSTI()); 996 } 997 998 bool isGFX9() const { 999 return AMDGPU::isGFX9(getSTI()); 1000 } 1001 1002 bool hasInv2PiInlineImm() const { 1003 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1004 } 1005 1006 bool hasFlatOffsets() const { 1007 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1008 } 1009 1010 bool hasSGPR102_SGPR103() const { 1011 return !isVI(); 1012 } 1013 1014 bool hasIntClamp() const { 1015 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1016 } 1017 1018 AMDGPUTargetStreamer &getTargetStreamer() { 1019 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1020 return static_cast<AMDGPUTargetStreamer &>(TS); 1021 } 1022 1023 const MCRegisterInfo *getMRI() const { 1024 // We need this const_cast because for some reason getContext() is not const 1025 // in MCAsmParser. 1026 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1027 } 1028 1029 const MCInstrInfo *getMII() const { 1030 return &MII; 1031 } 1032 1033 const FeatureBitset &getFeatureBits() const { 1034 return getSTI().getFeatureBits(); 1035 } 1036 1037 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1038 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1039 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1040 1041 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1042 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1043 bool isForcedDPP() const { return ForcedDPP; } 1044 bool isForcedSDWA() const { return ForcedSDWA; } 1045 ArrayRef<unsigned> getMatchedVariants() const; 1046 1047 std::unique_ptr<AMDGPUOperand> parseRegister(); 1048 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1049 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1050 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1051 unsigned Kind) override; 1052 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1053 OperandVector &Operands, MCStreamer &Out, 1054 uint64_t &ErrorInfo, 1055 bool MatchingInlineAsm) override; 1056 bool ParseDirective(AsmToken DirectiveID) override; 1057 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1058 StringRef parseMnemonicSuffix(StringRef Name); 1059 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1060 SMLoc NameLoc, OperandVector &Operands) override; 1061 //bool ProcessInstruction(MCInst &Inst); 1062 1063 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1064 1065 OperandMatchResultTy 1066 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1067 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1068 bool (*ConvertResult)(int64_t &) = nullptr); 1069 1070 OperandMatchResultTy parseOperandArrayWithPrefix( 1071 const char *Prefix, 1072 OperandVector &Operands, 1073 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1074 bool (*ConvertResult)(int64_t&) = nullptr); 1075 1076 OperandMatchResultTy 1077 parseNamedBit(const char *Name, OperandVector &Operands, 1078 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1079 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1080 StringRef &Value); 1081 1082 bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false); 1083 bool parseSP3NegModifier(); 1084 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1085 OperandMatchResultTy parseReg(OperandVector &Operands); 1086 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1087 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1088 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1089 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1090 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1091 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1092 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1093 1094 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1095 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1096 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1097 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1098 1099 bool parseCnt(int64_t &IntVal); 1100 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1101 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1102 1103 private: 1104 struct OperandInfoTy { 1105 int64_t Id; 1106 bool IsSymbolic = false; 1107 1108 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1109 }; 1110 1111 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1112 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1113 1114 void errorExpTgt(); 1115 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1116 1117 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1118 bool validateSOPLiteral(const MCInst &Inst) const; 1119 bool validateConstantBusLimitations(const MCInst &Inst); 1120 bool validateEarlyClobberLimitations(const MCInst &Inst); 1121 bool validateIntClampSupported(const MCInst &Inst); 1122 bool validateMIMGAtomicDMask(const MCInst &Inst); 1123 bool validateMIMGGatherDMask(const MCInst &Inst); 1124 bool validateMIMGDataSize(const MCInst &Inst); 1125 bool validateMIMGD16(const MCInst &Inst); 1126 bool validateLdsDirect(const MCInst &Inst); 1127 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1128 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1129 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1130 1131 bool isId(const StringRef Id) const; 1132 bool isId(const AsmToken &Token, const StringRef Id) const; 1133 bool isToken(const AsmToken::TokenKind Kind) const; 1134 bool trySkipId(const StringRef Id); 1135 bool trySkipToken(const AsmToken::TokenKind Kind); 1136 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1137 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1138 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1139 AsmToken::TokenKind getTokenKind() const; 1140 bool parseExpr(int64_t &Imm); 1141 StringRef getTokenStr() const; 1142 AsmToken peekToken(); 1143 AsmToken getToken() const; 1144 SMLoc getLoc() const; 1145 void lex(); 1146 1147 public: 1148 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1149 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1150 1151 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1152 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1153 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1154 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1155 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1156 1157 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1158 const unsigned MinVal, 1159 const unsigned MaxVal, 1160 const StringRef ErrMsg); 1161 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1162 bool parseSwizzleOffset(int64_t &Imm); 1163 bool parseSwizzleMacro(int64_t &Imm); 1164 bool parseSwizzleQuadPerm(int64_t &Imm); 1165 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1166 bool parseSwizzleBroadcast(int64_t &Imm); 1167 bool parseSwizzleSwap(int64_t &Imm); 1168 bool parseSwizzleReverse(int64_t &Imm); 1169 1170 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1171 int64_t parseGPRIdxMacro(); 1172 1173 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1174 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1175 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1176 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1177 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1178 1179 AMDGPUOperand::Ptr defaultGLC() const; 1180 AMDGPUOperand::Ptr defaultSLC() const; 1181 1182 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1183 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1184 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1185 AMDGPUOperand::Ptr defaultOffsetU12() const; 1186 AMDGPUOperand::Ptr defaultOffsetS13() const; 1187 1188 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1189 1190 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1191 OptionalImmIndexMap &OptionalIdx); 1192 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1193 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1194 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1195 1196 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1197 1198 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1199 bool IsAtomic = false); 1200 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1201 1202 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1203 AMDGPUOperand::Ptr defaultRowMask() const; 1204 AMDGPUOperand::Ptr defaultBankMask() const; 1205 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1206 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1207 1208 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1209 AMDGPUOperand::ImmTy Type); 1210 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1211 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1212 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1213 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1214 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1215 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1216 uint64_t BasicInstType, bool skipVcc = false); 1217 1218 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1219 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1220 }; 1221 1222 struct OptionalOperand { 1223 const char *Name; 1224 AMDGPUOperand::ImmTy Type; 1225 bool IsBit; 1226 bool (*ConvertResult)(int64_t&); 1227 }; 1228 1229 } // end anonymous namespace 1230 1231 // May be called with integer type with equivalent bitwidth. 1232 static const fltSemantics *getFltSemantics(unsigned Size) { 1233 switch (Size) { 1234 case 4: 1235 return &APFloat::IEEEsingle(); 1236 case 8: 1237 return &APFloat::IEEEdouble(); 1238 case 2: 1239 return &APFloat::IEEEhalf(); 1240 default: 1241 llvm_unreachable("unsupported fp type"); 1242 } 1243 } 1244 1245 static const fltSemantics *getFltSemantics(MVT VT) { 1246 return getFltSemantics(VT.getSizeInBits() / 8); 1247 } 1248 1249 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1250 switch (OperandType) { 1251 case AMDGPU::OPERAND_REG_IMM_INT32: 1252 case AMDGPU::OPERAND_REG_IMM_FP32: 1253 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1254 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1255 return &APFloat::IEEEsingle(); 1256 case AMDGPU::OPERAND_REG_IMM_INT64: 1257 case AMDGPU::OPERAND_REG_IMM_FP64: 1258 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1259 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1260 return &APFloat::IEEEdouble(); 1261 case AMDGPU::OPERAND_REG_IMM_INT16: 1262 case AMDGPU::OPERAND_REG_IMM_FP16: 1263 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1264 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1265 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1266 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1267 return &APFloat::IEEEhalf(); 1268 default: 1269 llvm_unreachable("unsupported fp type"); 1270 } 1271 } 1272 1273 //===----------------------------------------------------------------------===// 1274 // Operand 1275 //===----------------------------------------------------------------------===// 1276 1277 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1278 bool Lost; 1279 1280 // Convert literal to single precision 1281 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1282 APFloat::rmNearestTiesToEven, 1283 &Lost); 1284 // We allow precision lost but not overflow or underflow 1285 if (Status != APFloat::opOK && 1286 Lost && 1287 ((Status & APFloat::opOverflow) != 0 || 1288 (Status & APFloat::opUnderflow) != 0)) { 1289 return false; 1290 } 1291 1292 return true; 1293 } 1294 1295 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1296 return isUIntN(Size, Val) || isIntN(Size, Val); 1297 } 1298 1299 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1300 1301 // This is a hack to enable named inline values like 1302 // shared_base with both 32-bit and 64-bit operands. 1303 // Note that these values are defined as 1304 // 32-bit operands only. 1305 if (isInlineValue()) { 1306 return true; 1307 } 1308 1309 if (!isImmTy(ImmTyNone)) { 1310 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1311 return false; 1312 } 1313 // TODO: We should avoid using host float here. It would be better to 1314 // check the float bit values which is what a few other places do. 1315 // We've had bot failures before due to weird NaN support on mips hosts. 1316 1317 APInt Literal(64, Imm.Val); 1318 1319 if (Imm.IsFPImm) { // We got fp literal token 1320 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1321 return AMDGPU::isInlinableLiteral64(Imm.Val, 1322 AsmParser->hasInv2PiInlineImm()); 1323 } 1324 1325 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1326 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1327 return false; 1328 1329 if (type.getScalarSizeInBits() == 16) { 1330 return AMDGPU::isInlinableLiteral16( 1331 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1332 AsmParser->hasInv2PiInlineImm()); 1333 } 1334 1335 // Check if single precision literal is inlinable 1336 return AMDGPU::isInlinableLiteral32( 1337 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1338 AsmParser->hasInv2PiInlineImm()); 1339 } 1340 1341 // We got int literal token. 1342 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1343 return AMDGPU::isInlinableLiteral64(Imm.Val, 1344 AsmParser->hasInv2PiInlineImm()); 1345 } 1346 1347 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1348 return false; 1349 } 1350 1351 if (type.getScalarSizeInBits() == 16) { 1352 return AMDGPU::isInlinableLiteral16( 1353 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1354 AsmParser->hasInv2PiInlineImm()); 1355 } 1356 1357 return AMDGPU::isInlinableLiteral32( 1358 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1359 AsmParser->hasInv2PiInlineImm()); 1360 } 1361 1362 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1363 // Check that this immediate can be added as literal 1364 if (!isImmTy(ImmTyNone)) { 1365 return false; 1366 } 1367 1368 if (!Imm.IsFPImm) { 1369 // We got int literal token. 1370 1371 if (type == MVT::f64 && hasFPModifiers()) { 1372 // Cannot apply fp modifiers to int literals preserving the same semantics 1373 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1374 // disable these cases. 1375 return false; 1376 } 1377 1378 unsigned Size = type.getSizeInBits(); 1379 if (Size == 64) 1380 Size = 32; 1381 1382 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1383 // types. 1384 return isSafeTruncation(Imm.Val, Size); 1385 } 1386 1387 // We got fp literal token 1388 if (type == MVT::f64) { // Expected 64-bit fp operand 1389 // We would set low 64-bits of literal to zeroes but we accept this literals 1390 return true; 1391 } 1392 1393 if (type == MVT::i64) { // Expected 64-bit int operand 1394 // We don't allow fp literals in 64-bit integer instructions. It is 1395 // unclear how we should encode them. 1396 return false; 1397 } 1398 1399 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1400 return canLosslesslyConvertToFPType(FPLiteral, type); 1401 } 1402 1403 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1404 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1405 } 1406 1407 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1408 if (AsmParser->isVI()) 1409 return isVReg32(); 1410 else if (AsmParser->isGFX9()) 1411 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1412 else 1413 return false; 1414 } 1415 1416 bool AMDGPUOperand::isSDWAFP16Operand() const { 1417 return isSDWAOperand(MVT::f16); 1418 } 1419 1420 bool AMDGPUOperand::isSDWAFP32Operand() const { 1421 return isSDWAOperand(MVT::f32); 1422 } 1423 1424 bool AMDGPUOperand::isSDWAInt16Operand() const { 1425 return isSDWAOperand(MVT::i16); 1426 } 1427 1428 bool AMDGPUOperand::isSDWAInt32Operand() const { 1429 return isSDWAOperand(MVT::i32); 1430 } 1431 1432 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1433 { 1434 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1435 assert(Size == 2 || Size == 4 || Size == 8); 1436 1437 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1438 1439 if (Imm.Mods.Abs) { 1440 Val &= ~FpSignMask; 1441 } 1442 if (Imm.Mods.Neg) { 1443 Val ^= FpSignMask; 1444 } 1445 1446 return Val; 1447 } 1448 1449 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1450 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1451 Inst.getNumOperands())) { 1452 addLiteralImmOperand(Inst, Imm.Val, 1453 ApplyModifiers & 1454 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1455 } else { 1456 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1457 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1458 } 1459 } 1460 1461 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1462 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1463 auto OpNum = Inst.getNumOperands(); 1464 // Check that this operand accepts literals 1465 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1466 1467 if (ApplyModifiers) { 1468 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1469 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1470 Val = applyInputFPModifiers(Val, Size); 1471 } 1472 1473 APInt Literal(64, Val); 1474 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1475 1476 if (Imm.IsFPImm) { // We got fp literal token 1477 switch (OpTy) { 1478 case AMDGPU::OPERAND_REG_IMM_INT64: 1479 case AMDGPU::OPERAND_REG_IMM_FP64: 1480 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1481 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1482 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1483 AsmParser->hasInv2PiInlineImm())) { 1484 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1485 return; 1486 } 1487 1488 // Non-inlineable 1489 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1490 // For fp operands we check if low 32 bits are zeros 1491 if (Literal.getLoBits(32) != 0) { 1492 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1493 "Can't encode literal as exact 64-bit floating-point operand. " 1494 "Low 32-bits will be set to zero"); 1495 } 1496 1497 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1498 return; 1499 } 1500 1501 // We don't allow fp literals in 64-bit integer instructions. It is 1502 // unclear how we should encode them. This case should be checked earlier 1503 // in predicate methods (isLiteralImm()) 1504 llvm_unreachable("fp literal in 64-bit integer instruction."); 1505 1506 case AMDGPU::OPERAND_REG_IMM_INT32: 1507 case AMDGPU::OPERAND_REG_IMM_FP32: 1508 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1509 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1510 case AMDGPU::OPERAND_REG_IMM_INT16: 1511 case AMDGPU::OPERAND_REG_IMM_FP16: 1512 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1513 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1514 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1515 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1516 bool lost; 1517 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1518 // Convert literal to single precision 1519 FPLiteral.convert(*getOpFltSemantics(OpTy), 1520 APFloat::rmNearestTiesToEven, &lost); 1521 // We allow precision lost but not overflow or underflow. This should be 1522 // checked earlier in isLiteralImm() 1523 1524 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1525 Inst.addOperand(MCOperand::createImm(ImmVal)); 1526 return; 1527 } 1528 default: 1529 llvm_unreachable("invalid operand size"); 1530 } 1531 1532 return; 1533 } 1534 1535 // We got int literal token. 1536 // Only sign extend inline immediates. 1537 switch (OpTy) { 1538 case AMDGPU::OPERAND_REG_IMM_INT32: 1539 case AMDGPU::OPERAND_REG_IMM_FP32: 1540 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1541 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1542 if (isSafeTruncation(Val, 32) && 1543 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1544 AsmParser->hasInv2PiInlineImm())) { 1545 Inst.addOperand(MCOperand::createImm(Val)); 1546 return; 1547 } 1548 1549 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1550 return; 1551 1552 case AMDGPU::OPERAND_REG_IMM_INT64: 1553 case AMDGPU::OPERAND_REG_IMM_FP64: 1554 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1555 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1556 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1557 Inst.addOperand(MCOperand::createImm(Val)); 1558 return; 1559 } 1560 1561 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1562 return; 1563 1564 case AMDGPU::OPERAND_REG_IMM_INT16: 1565 case AMDGPU::OPERAND_REG_IMM_FP16: 1566 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1567 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1568 if (isSafeTruncation(Val, 16) && 1569 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1570 AsmParser->hasInv2PiInlineImm())) { 1571 Inst.addOperand(MCOperand::createImm(Val)); 1572 return; 1573 } 1574 1575 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1576 return; 1577 1578 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1579 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1580 assert(isSafeTruncation(Val, 16)); 1581 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1582 AsmParser->hasInv2PiInlineImm())); 1583 1584 Inst.addOperand(MCOperand::createImm(Val)); 1585 return; 1586 } 1587 default: 1588 llvm_unreachable("invalid operand size"); 1589 } 1590 } 1591 1592 template <unsigned Bitwidth> 1593 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1594 APInt Literal(64, Imm.Val); 1595 1596 if (!Imm.IsFPImm) { 1597 // We got int literal token. 1598 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1599 return; 1600 } 1601 1602 bool Lost; 1603 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1604 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1605 APFloat::rmNearestTiesToEven, &Lost); 1606 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1607 } 1608 1609 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1610 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1611 } 1612 1613 static bool isInlineValue(unsigned Reg) { 1614 switch (Reg) { 1615 case AMDGPU::SRC_SHARED_BASE: 1616 case AMDGPU::SRC_SHARED_LIMIT: 1617 case AMDGPU::SRC_PRIVATE_BASE: 1618 case AMDGPU::SRC_PRIVATE_LIMIT: 1619 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1620 return true; 1621 default: 1622 return false; 1623 } 1624 } 1625 1626 bool AMDGPUOperand::isInlineValue() const { 1627 return isRegKind() && ::isInlineValue(getReg()); 1628 } 1629 1630 //===----------------------------------------------------------------------===// 1631 // AsmParser 1632 //===----------------------------------------------------------------------===// 1633 1634 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1635 if (Is == IS_VGPR) { 1636 switch (RegWidth) { 1637 default: return -1; 1638 case 1: return AMDGPU::VGPR_32RegClassID; 1639 case 2: return AMDGPU::VReg_64RegClassID; 1640 case 3: return AMDGPU::VReg_96RegClassID; 1641 case 4: return AMDGPU::VReg_128RegClassID; 1642 case 8: return AMDGPU::VReg_256RegClassID; 1643 case 16: return AMDGPU::VReg_512RegClassID; 1644 } 1645 } else if (Is == IS_TTMP) { 1646 switch (RegWidth) { 1647 default: return -1; 1648 case 1: return AMDGPU::TTMP_32RegClassID; 1649 case 2: return AMDGPU::TTMP_64RegClassID; 1650 case 4: return AMDGPU::TTMP_128RegClassID; 1651 case 8: return AMDGPU::TTMP_256RegClassID; 1652 case 16: return AMDGPU::TTMP_512RegClassID; 1653 } 1654 } else if (Is == IS_SGPR) { 1655 switch (RegWidth) { 1656 default: return -1; 1657 case 1: return AMDGPU::SGPR_32RegClassID; 1658 case 2: return AMDGPU::SGPR_64RegClassID; 1659 case 4: return AMDGPU::SGPR_128RegClassID; 1660 case 8: return AMDGPU::SGPR_256RegClassID; 1661 case 16: return AMDGPU::SGPR_512RegClassID; 1662 } 1663 } 1664 return -1; 1665 } 1666 1667 static unsigned getSpecialRegForName(StringRef RegName) { 1668 return StringSwitch<unsigned>(RegName) 1669 .Case("exec", AMDGPU::EXEC) 1670 .Case("vcc", AMDGPU::VCC) 1671 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1672 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1673 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1674 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1675 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1676 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1677 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1678 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1679 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1680 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1681 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1682 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1683 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1684 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1685 .Case("m0", AMDGPU::M0) 1686 .Case("scc", AMDGPU::SCC) 1687 .Case("tba", AMDGPU::TBA) 1688 .Case("tma", AMDGPU::TMA) 1689 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1690 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1691 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1692 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1693 .Case("vcc_lo", AMDGPU::VCC_LO) 1694 .Case("vcc_hi", AMDGPU::VCC_HI) 1695 .Case("exec_lo", AMDGPU::EXEC_LO) 1696 .Case("exec_hi", AMDGPU::EXEC_HI) 1697 .Case("tma_lo", AMDGPU::TMA_LO) 1698 .Case("tma_hi", AMDGPU::TMA_HI) 1699 .Case("tba_lo", AMDGPU::TBA_LO) 1700 .Case("tba_hi", AMDGPU::TBA_HI) 1701 .Default(0); 1702 } 1703 1704 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1705 SMLoc &EndLoc) { 1706 auto R = parseRegister(); 1707 if (!R) return true; 1708 assert(R->isReg()); 1709 RegNo = R->getReg(); 1710 StartLoc = R->getStartLoc(); 1711 EndLoc = R->getEndLoc(); 1712 return false; 1713 } 1714 1715 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1716 RegisterKind RegKind, unsigned Reg1, 1717 unsigned RegNum) { 1718 switch (RegKind) { 1719 case IS_SPECIAL: 1720 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1721 Reg = AMDGPU::EXEC; 1722 RegWidth = 2; 1723 return true; 1724 } 1725 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1726 Reg = AMDGPU::FLAT_SCR; 1727 RegWidth = 2; 1728 return true; 1729 } 1730 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1731 Reg = AMDGPU::XNACK_MASK; 1732 RegWidth = 2; 1733 return true; 1734 } 1735 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1736 Reg = AMDGPU::VCC; 1737 RegWidth = 2; 1738 return true; 1739 } 1740 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1741 Reg = AMDGPU::TBA; 1742 RegWidth = 2; 1743 return true; 1744 } 1745 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1746 Reg = AMDGPU::TMA; 1747 RegWidth = 2; 1748 return true; 1749 } 1750 return false; 1751 case IS_VGPR: 1752 case IS_SGPR: 1753 case IS_TTMP: 1754 if (Reg1 != Reg + RegWidth) { 1755 return false; 1756 } 1757 RegWidth++; 1758 return true; 1759 default: 1760 llvm_unreachable("unexpected register kind"); 1761 } 1762 } 1763 1764 static const StringRef Registers[] = { 1765 { "v" }, 1766 { "s" }, 1767 { "ttmp" }, 1768 }; 1769 1770 bool 1771 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1772 const AsmToken &NextToken) const { 1773 1774 // A list of consecutive registers: [s0,s1,s2,s3] 1775 if (Token.is(AsmToken::LBrac)) 1776 return true; 1777 1778 if (!Token.is(AsmToken::Identifier)) 1779 return false; 1780 1781 // A single register like s0 or a range of registers like s[0:1] 1782 1783 StringRef RegName = Token.getString(); 1784 1785 for (StringRef Reg : Registers) { 1786 if (RegName.startswith(Reg)) { 1787 if (Reg.size() < RegName.size()) { 1788 unsigned RegNum; 1789 // A single register with an index: rXX 1790 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1791 return true; 1792 } else { 1793 // A range of registers: r[XX:YY]. 1794 if (NextToken.is(AsmToken::LBrac)) 1795 return true; 1796 } 1797 } 1798 } 1799 1800 return getSpecialRegForName(RegName); 1801 } 1802 1803 bool 1804 AMDGPUAsmParser::isRegister() 1805 { 1806 return isRegister(getToken(), peekToken()); 1807 } 1808 1809 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1810 unsigned &RegNum, unsigned &RegWidth, 1811 unsigned *DwordRegIndex) { 1812 if (DwordRegIndex) { *DwordRegIndex = 0; } 1813 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1814 if (getLexer().is(AsmToken::Identifier)) { 1815 StringRef RegName = Parser.getTok().getString(); 1816 if ((Reg = getSpecialRegForName(RegName))) { 1817 Parser.Lex(); 1818 RegKind = IS_SPECIAL; 1819 } else { 1820 unsigned RegNumIndex = 0; 1821 if (RegName[0] == 'v') { 1822 RegNumIndex = 1; 1823 RegKind = IS_VGPR; 1824 } else if (RegName[0] == 's') { 1825 RegNumIndex = 1; 1826 RegKind = IS_SGPR; 1827 } else if (RegName.startswith("ttmp")) { 1828 RegNumIndex = strlen("ttmp"); 1829 RegKind = IS_TTMP; 1830 } else { 1831 return false; 1832 } 1833 if (RegName.size() > RegNumIndex) { 1834 // Single 32-bit register: vXX. 1835 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1836 return false; 1837 Parser.Lex(); 1838 RegWidth = 1; 1839 } else { 1840 // Range of registers: v[XX:YY]. ":YY" is optional. 1841 Parser.Lex(); 1842 int64_t RegLo, RegHi; 1843 if (getLexer().isNot(AsmToken::LBrac)) 1844 return false; 1845 Parser.Lex(); 1846 1847 if (getParser().parseAbsoluteExpression(RegLo)) 1848 return false; 1849 1850 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1851 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1852 return false; 1853 Parser.Lex(); 1854 1855 if (isRBrace) { 1856 RegHi = RegLo; 1857 } else { 1858 if (getParser().parseAbsoluteExpression(RegHi)) 1859 return false; 1860 1861 if (getLexer().isNot(AsmToken::RBrac)) 1862 return false; 1863 Parser.Lex(); 1864 } 1865 RegNum = (unsigned) RegLo; 1866 RegWidth = (RegHi - RegLo) + 1; 1867 } 1868 } 1869 } else if (getLexer().is(AsmToken::LBrac)) { 1870 // List of consecutive registers: [s0,s1,s2,s3] 1871 Parser.Lex(); 1872 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1873 return false; 1874 if (RegWidth != 1) 1875 return false; 1876 RegisterKind RegKind1; 1877 unsigned Reg1, RegNum1, RegWidth1; 1878 do { 1879 if (getLexer().is(AsmToken::Comma)) { 1880 Parser.Lex(); 1881 } else if (getLexer().is(AsmToken::RBrac)) { 1882 Parser.Lex(); 1883 break; 1884 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1885 if (RegWidth1 != 1) { 1886 return false; 1887 } 1888 if (RegKind1 != RegKind) { 1889 return false; 1890 } 1891 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1892 return false; 1893 } 1894 } else { 1895 return false; 1896 } 1897 } while (true); 1898 } else { 1899 return false; 1900 } 1901 switch (RegKind) { 1902 case IS_SPECIAL: 1903 RegNum = 0; 1904 RegWidth = 1; 1905 break; 1906 case IS_VGPR: 1907 case IS_SGPR: 1908 case IS_TTMP: 1909 { 1910 unsigned Size = 1; 1911 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1912 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1913 Size = std::min(RegWidth, 4u); 1914 } 1915 if (RegNum % Size != 0) 1916 return false; 1917 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1918 RegNum = RegNum / Size; 1919 int RCID = getRegClass(RegKind, RegWidth); 1920 if (RCID == -1) 1921 return false; 1922 const MCRegisterClass RC = TRI->getRegClass(RCID); 1923 if (RegNum >= RC.getNumRegs()) 1924 return false; 1925 Reg = RC.getRegister(RegNum); 1926 break; 1927 } 1928 1929 default: 1930 llvm_unreachable("unexpected register kind"); 1931 } 1932 1933 if (!subtargetHasRegister(*TRI, Reg)) 1934 return false; 1935 return true; 1936 } 1937 1938 Optional<StringRef> 1939 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1940 switch (RegKind) { 1941 case IS_VGPR: 1942 return StringRef(".amdgcn.next_free_vgpr"); 1943 case IS_SGPR: 1944 return StringRef(".amdgcn.next_free_sgpr"); 1945 default: 1946 return None; 1947 } 1948 } 1949 1950 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1951 auto SymbolName = getGprCountSymbolName(RegKind); 1952 assert(SymbolName && "initializing invalid register kind"); 1953 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1954 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1955 } 1956 1957 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1958 unsigned DwordRegIndex, 1959 unsigned RegWidth) { 1960 // Symbols are only defined for GCN targets 1961 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1962 return true; 1963 1964 auto SymbolName = getGprCountSymbolName(RegKind); 1965 if (!SymbolName) 1966 return true; 1967 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1968 1969 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1970 int64_t OldCount; 1971 1972 if (!Sym->isVariable()) 1973 return !Error(getParser().getTok().getLoc(), 1974 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1975 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1976 return !Error( 1977 getParser().getTok().getLoc(), 1978 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1979 1980 if (OldCount <= NewMax) 1981 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1982 1983 return true; 1984 } 1985 1986 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1987 const auto &Tok = Parser.getTok(); 1988 SMLoc StartLoc = Tok.getLoc(); 1989 SMLoc EndLoc = Tok.getEndLoc(); 1990 RegisterKind RegKind; 1991 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1992 1993 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1994 //FIXME: improve error messages (bug 41303). 1995 Error(StartLoc, "not a valid operand."); 1996 return nullptr; 1997 } 1998 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1999 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2000 return nullptr; 2001 } else 2002 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2003 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2004 } 2005 2006 bool 2007 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) { 2008 if (HasSP3AbsModifier) { 2009 // This is a workaround for handling expressions 2010 // as arguments of SP3 'abs' modifier, for example: 2011 // |1.0| 2012 // |-1| 2013 // |1+x| 2014 // This syntax is not compatible with syntax of standard 2015 // MC expressions (due to the trailing '|'). 2016 2017 SMLoc EndLoc; 2018 const MCExpr *Expr; 2019 SMLoc StartLoc = getLoc(); 2020 2021 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 2022 return true; 2023 } 2024 2025 if (!Expr->evaluateAsAbsolute(Val)) 2026 return Error(StartLoc, "expected absolute expression"); 2027 2028 return false; 2029 } 2030 2031 return getParser().parseAbsoluteExpression(Val); 2032 } 2033 2034 OperandMatchResultTy 2035 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2036 // TODO: add syntactic sugar for 1/(2*PI) 2037 2038 const auto& Tok = getToken(); 2039 const auto& NextTok = peekToken(); 2040 bool IsReal = Tok.is(AsmToken::Real); 2041 SMLoc S = Tok.getLoc(); 2042 bool Negate = false; 2043 2044 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2045 lex(); 2046 IsReal = true; 2047 Negate = true; 2048 } 2049 2050 if (IsReal) { 2051 // Floating-point expressions are not supported. 2052 // Can only allow floating-point literals with an 2053 // optional sign. 2054 2055 StringRef Num = getTokenStr(); 2056 lex(); 2057 2058 APFloat RealVal(APFloat::IEEEdouble()); 2059 auto roundMode = APFloat::rmNearestTiesToEven; 2060 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2061 return MatchOperand_ParseFail; 2062 } 2063 if (Negate) 2064 RealVal.changeSign(); 2065 2066 Operands.push_back( 2067 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2068 AMDGPUOperand::ImmTyNone, true)); 2069 2070 return MatchOperand_Success; 2071 2072 // FIXME: Should enable arbitrary expressions here 2073 } else if (Tok.is(AsmToken::Integer) || 2074 (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){ 2075 2076 int64_t IntVal; 2077 if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier)) 2078 return MatchOperand_ParseFail; 2079 2080 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2081 return MatchOperand_Success; 2082 } 2083 2084 return MatchOperand_NoMatch; 2085 } 2086 2087 OperandMatchResultTy 2088 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2089 if (!isRegister()) 2090 return MatchOperand_NoMatch; 2091 2092 if (auto R = parseRegister()) { 2093 assert(R->isReg()); 2094 Operands.push_back(std::move(R)); 2095 return MatchOperand_Success; 2096 } 2097 return MatchOperand_ParseFail; 2098 } 2099 2100 OperandMatchResultTy 2101 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2102 auto res = parseReg(Operands); 2103 return (res == MatchOperand_NoMatch)? 2104 parseImm(Operands, HasSP3AbsMod) : 2105 res; 2106 } 2107 2108 // Check if the current token is an SP3 'neg' modifier. 2109 // Currently this modifier is allowed in the following context: 2110 // 2111 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2112 // 2. Before an 'abs' modifier: -abs(...) 2113 // 3. Before an SP3 'abs' modifier: -|...| 2114 // 2115 // In all other cases "-" is handled as a part 2116 // of an expression that follows the sign. 2117 // 2118 // Note: When "-" is followed by an integer literal, 2119 // this is interpreted as integer negation rather 2120 // than a floating-point NEG modifier applied to N. 2121 // Beside being contr-intuitive, such use of floating-point 2122 // NEG modifier would have resulted in different meaning 2123 // of integer literals used with VOP1/2/C and VOP3, 2124 // for example: 2125 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2126 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2127 // Negative fp literals with preceding "-" are 2128 // handled likewise for unifomtity 2129 // 2130 bool 2131 AMDGPUAsmParser::parseSP3NegModifier() { 2132 2133 AsmToken NextToken[2]; 2134 peekTokens(NextToken); 2135 2136 if (isToken(AsmToken::Minus) && 2137 (isRegister(NextToken[0], NextToken[1]) || 2138 NextToken[0].is(AsmToken::Pipe) || 2139 isId(NextToken[0], "abs"))) { 2140 lex(); 2141 return true; 2142 } 2143 2144 return false; 2145 } 2146 2147 OperandMatchResultTy 2148 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2149 bool AllowImm) { 2150 bool Neg, SP3Neg; 2151 bool Abs, SP3Abs; 2152 SMLoc Loc; 2153 2154 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2155 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2156 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2157 return MatchOperand_ParseFail; 2158 } 2159 2160 SP3Neg = parseSP3NegModifier(); 2161 2162 Loc = getLoc(); 2163 Neg = trySkipId("neg"); 2164 if (Neg && SP3Neg) { 2165 Error(Loc, "expected register or immediate"); 2166 return MatchOperand_ParseFail; 2167 } 2168 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2169 return MatchOperand_ParseFail; 2170 2171 Abs = trySkipId("abs"); 2172 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2173 return MatchOperand_ParseFail; 2174 2175 Loc = getLoc(); 2176 SP3Abs = trySkipToken(AsmToken::Pipe); 2177 if (Abs && SP3Abs) { 2178 Error(Loc, "expected register or immediate"); 2179 return MatchOperand_ParseFail; 2180 } 2181 2182 OperandMatchResultTy Res; 2183 if (AllowImm) { 2184 Res = parseRegOrImm(Operands, SP3Abs); 2185 } else { 2186 Res = parseReg(Operands); 2187 } 2188 if (Res != MatchOperand_Success) { 2189 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2190 } 2191 2192 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2193 return MatchOperand_ParseFail; 2194 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2195 return MatchOperand_ParseFail; 2196 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2197 return MatchOperand_ParseFail; 2198 2199 AMDGPUOperand::Modifiers Mods; 2200 Mods.Abs = Abs || SP3Abs; 2201 Mods.Neg = Neg || SP3Neg; 2202 2203 if (Mods.hasFPModifiers()) { 2204 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2205 Op.setModifiers(Mods); 2206 } 2207 return MatchOperand_Success; 2208 } 2209 2210 OperandMatchResultTy 2211 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2212 bool AllowImm) { 2213 bool Sext = trySkipId("sext"); 2214 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2215 return MatchOperand_ParseFail; 2216 2217 OperandMatchResultTy Res; 2218 if (AllowImm) { 2219 Res = parseRegOrImm(Operands); 2220 } else { 2221 Res = parseReg(Operands); 2222 } 2223 if (Res != MatchOperand_Success) { 2224 return Sext? MatchOperand_ParseFail : Res; 2225 } 2226 2227 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2228 return MatchOperand_ParseFail; 2229 2230 AMDGPUOperand::Modifiers Mods; 2231 Mods.Sext = Sext; 2232 2233 if (Mods.hasIntModifiers()) { 2234 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2235 Op.setModifiers(Mods); 2236 } 2237 2238 return MatchOperand_Success; 2239 } 2240 2241 OperandMatchResultTy 2242 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2243 return parseRegOrImmWithFPInputMods(Operands, false); 2244 } 2245 2246 OperandMatchResultTy 2247 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2248 return parseRegOrImmWithIntInputMods(Operands, false); 2249 } 2250 2251 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2252 auto Loc = getLoc(); 2253 if (trySkipId("off")) { 2254 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2255 AMDGPUOperand::ImmTyOff, false)); 2256 return MatchOperand_Success; 2257 } 2258 2259 if (!isRegister()) 2260 return MatchOperand_NoMatch; 2261 2262 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2263 if (Reg) { 2264 Operands.push_back(std::move(Reg)); 2265 return MatchOperand_Success; 2266 } 2267 2268 return MatchOperand_ParseFail; 2269 2270 } 2271 2272 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2273 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2274 2275 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2276 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2277 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2278 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2279 return Match_InvalidOperand; 2280 2281 if ((TSFlags & SIInstrFlags::VOP3) && 2282 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2283 getForcedEncodingSize() != 64) 2284 return Match_PreferE32; 2285 2286 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2287 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2288 // v_mac_f32/16 allow only dst_sel == DWORD; 2289 auto OpNum = 2290 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2291 const auto &Op = Inst.getOperand(OpNum); 2292 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2293 return Match_InvalidOperand; 2294 } 2295 } 2296 2297 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2298 // FIXME: Produces error without correct column reported. 2299 auto OpNum = 2300 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2301 const auto &Op = Inst.getOperand(OpNum); 2302 if (Op.getImm() != 0) 2303 return Match_InvalidOperand; 2304 } 2305 2306 return Match_Success; 2307 } 2308 2309 // What asm variants we should check 2310 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2311 if (getForcedEncodingSize() == 32) { 2312 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2313 return makeArrayRef(Variants); 2314 } 2315 2316 if (isForcedVOP3()) { 2317 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2318 return makeArrayRef(Variants); 2319 } 2320 2321 if (isForcedSDWA()) { 2322 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2323 AMDGPUAsmVariants::SDWA9}; 2324 return makeArrayRef(Variants); 2325 } 2326 2327 if (isForcedDPP()) { 2328 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2329 return makeArrayRef(Variants); 2330 } 2331 2332 static const unsigned Variants[] = { 2333 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2334 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2335 }; 2336 2337 return makeArrayRef(Variants); 2338 } 2339 2340 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2341 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2342 const unsigned Num = Desc.getNumImplicitUses(); 2343 for (unsigned i = 0; i < Num; ++i) { 2344 unsigned Reg = Desc.ImplicitUses[i]; 2345 switch (Reg) { 2346 case AMDGPU::FLAT_SCR: 2347 case AMDGPU::VCC: 2348 case AMDGPU::M0: 2349 return Reg; 2350 default: 2351 break; 2352 } 2353 } 2354 return AMDGPU::NoRegister; 2355 } 2356 2357 // NB: This code is correct only when used to check constant 2358 // bus limitations because GFX7 support no f16 inline constants. 2359 // Note that there are no cases when a GFX7 opcode violates 2360 // constant bus limitations due to the use of an f16 constant. 2361 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2362 unsigned OpIdx) const { 2363 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2364 2365 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2366 return false; 2367 } 2368 2369 const MCOperand &MO = Inst.getOperand(OpIdx); 2370 2371 int64_t Val = MO.getImm(); 2372 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2373 2374 switch (OpSize) { // expected operand size 2375 case 8: 2376 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2377 case 4: 2378 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2379 case 2: { 2380 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2381 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2382 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2383 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2384 } else { 2385 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2386 } 2387 } 2388 default: 2389 llvm_unreachable("invalid operand size"); 2390 } 2391 } 2392 2393 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2394 const MCOperand &MO = Inst.getOperand(OpIdx); 2395 if (MO.isImm()) { 2396 return !isInlineConstant(Inst, OpIdx); 2397 } 2398 return !MO.isReg() || 2399 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2400 } 2401 2402 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2403 const unsigned Opcode = Inst.getOpcode(); 2404 const MCInstrDesc &Desc = MII.get(Opcode); 2405 unsigned ConstantBusUseCount = 0; 2406 2407 if (Desc.TSFlags & 2408 (SIInstrFlags::VOPC | 2409 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2410 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2411 SIInstrFlags::SDWA)) { 2412 // Check special imm operands (used by madmk, etc) 2413 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2414 ++ConstantBusUseCount; 2415 } 2416 2417 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2418 if (SGPRUsed != AMDGPU::NoRegister) { 2419 ++ConstantBusUseCount; 2420 } 2421 2422 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2423 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2424 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2425 2426 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2427 2428 for (int OpIdx : OpIndices) { 2429 if (OpIdx == -1) break; 2430 2431 const MCOperand &MO = Inst.getOperand(OpIdx); 2432 if (usesConstantBus(Inst, OpIdx)) { 2433 if (MO.isReg()) { 2434 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2435 // Pairs of registers with a partial intersections like these 2436 // s0, s[0:1] 2437 // flat_scratch_lo, flat_scratch 2438 // flat_scratch_lo, flat_scratch_hi 2439 // are theoretically valid but they are disabled anyway. 2440 // Note that this code mimics SIInstrInfo::verifyInstruction 2441 if (Reg != SGPRUsed) { 2442 ++ConstantBusUseCount; 2443 } 2444 SGPRUsed = Reg; 2445 } else { // Expression or a literal 2446 ++ConstantBusUseCount; 2447 } 2448 } 2449 } 2450 } 2451 2452 return ConstantBusUseCount <= 1; 2453 } 2454 2455 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2456 const unsigned Opcode = Inst.getOpcode(); 2457 const MCInstrDesc &Desc = MII.get(Opcode); 2458 2459 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2460 if (DstIdx == -1 || 2461 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2462 return true; 2463 } 2464 2465 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2466 2467 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2468 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2469 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2470 2471 assert(DstIdx != -1); 2472 const MCOperand &Dst = Inst.getOperand(DstIdx); 2473 assert(Dst.isReg()); 2474 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2475 2476 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2477 2478 for (int SrcIdx : SrcIndices) { 2479 if (SrcIdx == -1) break; 2480 const MCOperand &Src = Inst.getOperand(SrcIdx); 2481 if (Src.isReg()) { 2482 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2483 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2484 return false; 2485 } 2486 } 2487 } 2488 2489 return true; 2490 } 2491 2492 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2493 2494 const unsigned Opc = Inst.getOpcode(); 2495 const MCInstrDesc &Desc = MII.get(Opc); 2496 2497 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2498 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2499 assert(ClampIdx != -1); 2500 return Inst.getOperand(ClampIdx).getImm() == 0; 2501 } 2502 2503 return true; 2504 } 2505 2506 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2507 2508 const unsigned Opc = Inst.getOpcode(); 2509 const MCInstrDesc &Desc = MII.get(Opc); 2510 2511 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2512 return true; 2513 2514 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2515 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2516 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2517 2518 assert(VDataIdx != -1); 2519 assert(DMaskIdx != -1); 2520 assert(TFEIdx != -1); 2521 2522 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2523 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2524 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2525 if (DMask == 0) 2526 DMask = 1; 2527 2528 unsigned DataSize = 2529 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2530 if (hasPackedD16()) { 2531 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2532 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2533 DataSize = (DataSize + 1) / 2; 2534 } 2535 2536 return (VDataSize / 4) == DataSize + TFESize; 2537 } 2538 2539 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2540 2541 const unsigned Opc = Inst.getOpcode(); 2542 const MCInstrDesc &Desc = MII.get(Opc); 2543 2544 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2545 return true; 2546 if (!Desc.mayLoad() || !Desc.mayStore()) 2547 return true; // Not atomic 2548 2549 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2550 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2551 2552 // This is an incomplete check because image_atomic_cmpswap 2553 // may only use 0x3 and 0xf while other atomic operations 2554 // may use 0x1 and 0x3. However these limitations are 2555 // verified when we check that dmask matches dst size. 2556 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2557 } 2558 2559 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2560 2561 const unsigned Opc = Inst.getOpcode(); 2562 const MCInstrDesc &Desc = MII.get(Opc); 2563 2564 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2565 return true; 2566 2567 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2568 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2569 2570 // GATHER4 instructions use dmask in a different fashion compared to 2571 // other MIMG instructions. The only useful DMASK values are 2572 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2573 // (red,red,red,red) etc.) The ISA document doesn't mention 2574 // this. 2575 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2576 } 2577 2578 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2579 2580 const unsigned Opc = Inst.getOpcode(); 2581 const MCInstrDesc &Desc = MII.get(Opc); 2582 2583 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2584 return true; 2585 2586 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2587 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2588 if (isCI() || isSI()) 2589 return false; 2590 } 2591 2592 return true; 2593 } 2594 2595 static bool IsRevOpcode(const unsigned Opcode) 2596 { 2597 switch (Opcode) { 2598 case AMDGPU::V_SUBREV_F32_e32: 2599 case AMDGPU::V_SUBREV_F32_e64: 2600 case AMDGPU::V_SUBREV_F32_e32_si: 2601 case AMDGPU::V_SUBREV_F32_e32_vi: 2602 case AMDGPU::V_SUBREV_F32_e64_si: 2603 case AMDGPU::V_SUBREV_F32_e64_vi: 2604 case AMDGPU::V_SUBREV_I32_e32: 2605 case AMDGPU::V_SUBREV_I32_e64: 2606 case AMDGPU::V_SUBREV_I32_e32_si: 2607 case AMDGPU::V_SUBREV_I32_e64_si: 2608 case AMDGPU::V_SUBBREV_U32_e32: 2609 case AMDGPU::V_SUBBREV_U32_e64: 2610 case AMDGPU::V_SUBBREV_U32_e32_si: 2611 case AMDGPU::V_SUBBREV_U32_e32_vi: 2612 case AMDGPU::V_SUBBREV_U32_e64_si: 2613 case AMDGPU::V_SUBBREV_U32_e64_vi: 2614 case AMDGPU::V_SUBREV_U32_e32: 2615 case AMDGPU::V_SUBREV_U32_e64: 2616 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2617 case AMDGPU::V_SUBREV_U32_e32_vi: 2618 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2619 case AMDGPU::V_SUBREV_U32_e64_vi: 2620 case AMDGPU::V_SUBREV_F16_e32: 2621 case AMDGPU::V_SUBREV_F16_e64: 2622 case AMDGPU::V_SUBREV_F16_e32_vi: 2623 case AMDGPU::V_SUBREV_F16_e64_vi: 2624 case AMDGPU::V_SUBREV_U16_e32: 2625 case AMDGPU::V_SUBREV_U16_e64: 2626 case AMDGPU::V_SUBREV_U16_e32_vi: 2627 case AMDGPU::V_SUBREV_U16_e64_vi: 2628 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2629 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2630 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2631 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2632 case AMDGPU::V_LSHLREV_B32_e32_si: 2633 case AMDGPU::V_LSHLREV_B32_e64_si: 2634 case AMDGPU::V_LSHLREV_B16_e32_vi: 2635 case AMDGPU::V_LSHLREV_B16_e64_vi: 2636 case AMDGPU::V_LSHLREV_B32_e32_vi: 2637 case AMDGPU::V_LSHLREV_B32_e64_vi: 2638 case AMDGPU::V_LSHLREV_B64_vi: 2639 case AMDGPU::V_LSHRREV_B32_e32_si: 2640 case AMDGPU::V_LSHRREV_B32_e64_si: 2641 case AMDGPU::V_LSHRREV_B16_e32_vi: 2642 case AMDGPU::V_LSHRREV_B16_e64_vi: 2643 case AMDGPU::V_LSHRREV_B32_e32_vi: 2644 case AMDGPU::V_LSHRREV_B32_e64_vi: 2645 case AMDGPU::V_LSHRREV_B64_vi: 2646 case AMDGPU::V_ASHRREV_I32_e64_si: 2647 case AMDGPU::V_ASHRREV_I32_e32_si: 2648 case AMDGPU::V_ASHRREV_I16_e32_vi: 2649 case AMDGPU::V_ASHRREV_I16_e64_vi: 2650 case AMDGPU::V_ASHRREV_I32_e32_vi: 2651 case AMDGPU::V_ASHRREV_I32_e64_vi: 2652 case AMDGPU::V_ASHRREV_I64_vi: 2653 case AMDGPU::V_PK_LSHLREV_B16_vi: 2654 case AMDGPU::V_PK_LSHRREV_B16_vi: 2655 case AMDGPU::V_PK_ASHRREV_I16_vi: 2656 return true; 2657 default: 2658 return false; 2659 } 2660 } 2661 2662 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2663 2664 using namespace SIInstrFlags; 2665 const unsigned Opcode = Inst.getOpcode(); 2666 const MCInstrDesc &Desc = MII.get(Opcode); 2667 2668 // lds_direct register is defined so that it can be used 2669 // with 9-bit operands only. Ignore encodings which do not accept these. 2670 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2671 return true; 2672 2673 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2674 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2675 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2676 2677 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2678 2679 // lds_direct cannot be specified as either src1 or src2. 2680 for (int SrcIdx : SrcIndices) { 2681 if (SrcIdx == -1) break; 2682 const MCOperand &Src = Inst.getOperand(SrcIdx); 2683 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2684 return false; 2685 } 2686 } 2687 2688 if (Src0Idx == -1) 2689 return true; 2690 2691 const MCOperand &Src = Inst.getOperand(Src0Idx); 2692 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2693 return true; 2694 2695 // lds_direct is specified as src0. Check additional limitations. 2696 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2697 } 2698 2699 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2700 unsigned Opcode = Inst.getOpcode(); 2701 const MCInstrDesc &Desc = MII.get(Opcode); 2702 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2703 return true; 2704 2705 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2706 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2707 2708 const int OpIndices[] = { Src0Idx, Src1Idx }; 2709 2710 unsigned NumLiterals = 0; 2711 uint32_t LiteralValue; 2712 2713 for (int OpIdx : OpIndices) { 2714 if (OpIdx == -1) break; 2715 2716 const MCOperand &MO = Inst.getOperand(OpIdx); 2717 if (MO.isImm() && 2718 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2719 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2720 !isInlineConstant(Inst, OpIdx)) { 2721 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2722 if (NumLiterals == 0 || LiteralValue != Value) { 2723 LiteralValue = Value; 2724 ++NumLiterals; 2725 } 2726 } 2727 } 2728 2729 return NumLiterals <= 1; 2730 } 2731 2732 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2733 const SMLoc &IDLoc) { 2734 if (!validateLdsDirect(Inst)) { 2735 Error(IDLoc, 2736 "invalid use of lds_direct"); 2737 return false; 2738 } 2739 if (!validateSOPLiteral(Inst)) { 2740 Error(IDLoc, 2741 "only one literal operand is allowed"); 2742 return false; 2743 } 2744 if (!validateConstantBusLimitations(Inst)) { 2745 Error(IDLoc, 2746 "invalid operand (violates constant bus restrictions)"); 2747 return false; 2748 } 2749 if (!validateEarlyClobberLimitations(Inst)) { 2750 Error(IDLoc, 2751 "destination must be different than all sources"); 2752 return false; 2753 } 2754 if (!validateIntClampSupported(Inst)) { 2755 Error(IDLoc, 2756 "integer clamping is not supported on this GPU"); 2757 return false; 2758 } 2759 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2760 if (!validateMIMGD16(Inst)) { 2761 Error(IDLoc, 2762 "d16 modifier is not supported on this GPU"); 2763 return false; 2764 } 2765 if (!validateMIMGDataSize(Inst)) { 2766 Error(IDLoc, 2767 "image data size does not match dmask and tfe"); 2768 return false; 2769 } 2770 if (!validateMIMGAtomicDMask(Inst)) { 2771 Error(IDLoc, 2772 "invalid atomic image dmask"); 2773 return false; 2774 } 2775 if (!validateMIMGGatherDMask(Inst)) { 2776 Error(IDLoc, 2777 "invalid image_gather dmask: only one bit must be set"); 2778 return false; 2779 } 2780 2781 return true; 2782 } 2783 2784 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 2785 const FeatureBitset &FBS, 2786 unsigned VariantID = 0); 2787 2788 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2789 OperandVector &Operands, 2790 MCStreamer &Out, 2791 uint64_t &ErrorInfo, 2792 bool MatchingInlineAsm) { 2793 MCInst Inst; 2794 unsigned Result = Match_Success; 2795 for (auto Variant : getMatchedVariants()) { 2796 uint64_t EI; 2797 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2798 Variant); 2799 // We order match statuses from least to most specific. We use most specific 2800 // status as resulting 2801 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2802 if ((R == Match_Success) || 2803 (R == Match_PreferE32) || 2804 (R == Match_MissingFeature && Result != Match_PreferE32) || 2805 (R == Match_InvalidOperand && Result != Match_MissingFeature 2806 && Result != Match_PreferE32) || 2807 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2808 && Result != Match_MissingFeature 2809 && Result != Match_PreferE32)) { 2810 Result = R; 2811 ErrorInfo = EI; 2812 } 2813 if (R == Match_Success) 2814 break; 2815 } 2816 2817 switch (Result) { 2818 default: break; 2819 case Match_Success: 2820 if (!validateInstruction(Inst, IDLoc)) { 2821 return true; 2822 } 2823 Inst.setLoc(IDLoc); 2824 Out.EmitInstruction(Inst, getSTI()); 2825 return false; 2826 2827 case Match_MissingFeature: 2828 return Error(IDLoc, "instruction not supported on this GPU"); 2829 2830 case Match_MnemonicFail: { 2831 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2832 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2833 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2834 return Error(IDLoc, "invalid instruction" + Suggestion, 2835 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2836 } 2837 2838 case Match_InvalidOperand: { 2839 SMLoc ErrorLoc = IDLoc; 2840 if (ErrorInfo != ~0ULL) { 2841 if (ErrorInfo >= Operands.size()) { 2842 return Error(IDLoc, "too few operands for instruction"); 2843 } 2844 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2845 if (ErrorLoc == SMLoc()) 2846 ErrorLoc = IDLoc; 2847 } 2848 return Error(ErrorLoc, "invalid operand for instruction"); 2849 } 2850 2851 case Match_PreferE32: 2852 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2853 "should be encoded as e32"); 2854 } 2855 llvm_unreachable("Implement any new match types added!"); 2856 } 2857 2858 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2859 int64_t Tmp = -1; 2860 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2861 return true; 2862 } 2863 if (getParser().parseAbsoluteExpression(Tmp)) { 2864 return true; 2865 } 2866 Ret = static_cast<uint32_t>(Tmp); 2867 return false; 2868 } 2869 2870 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2871 uint32_t &Minor) { 2872 if (ParseAsAbsoluteExpression(Major)) 2873 return TokError("invalid major version"); 2874 2875 if (getLexer().isNot(AsmToken::Comma)) 2876 return TokError("minor version number required, comma expected"); 2877 Lex(); 2878 2879 if (ParseAsAbsoluteExpression(Minor)) 2880 return TokError("invalid minor version"); 2881 2882 return false; 2883 } 2884 2885 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2886 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2887 return TokError("directive only supported for amdgcn architecture"); 2888 2889 std::string Target; 2890 2891 SMLoc TargetStart = getTok().getLoc(); 2892 if (getParser().parseEscapedString(Target)) 2893 return true; 2894 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2895 2896 std::string ExpectedTarget; 2897 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2898 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2899 2900 if (Target != ExpectedTargetOS.str()) 2901 return getParser().Error(TargetRange.Start, "target must match options", 2902 TargetRange); 2903 2904 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2905 return false; 2906 } 2907 2908 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2909 return getParser().Error(Range.Start, "value out of range", Range); 2910 } 2911 2912 bool AMDGPUAsmParser::calculateGPRBlocks( 2913 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2914 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2915 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2916 unsigned &SGPRBlocks) { 2917 // TODO(scott.linder): These calculations are duplicated from 2918 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2919 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2920 2921 unsigned NumVGPRs = NextFreeVGPR; 2922 unsigned NumSGPRs = NextFreeSGPR; 2923 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2924 2925 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2926 NumSGPRs > MaxAddressableNumSGPRs) 2927 return OutOfRangeError(SGPRRange); 2928 2929 NumSGPRs += 2930 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2931 2932 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2933 NumSGPRs > MaxAddressableNumSGPRs) 2934 return OutOfRangeError(SGPRRange); 2935 2936 if (Features.test(FeatureSGPRInitBug)) 2937 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2938 2939 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2940 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2941 2942 return false; 2943 } 2944 2945 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2946 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2947 return TokError("directive only supported for amdgcn architecture"); 2948 2949 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2950 return TokError("directive only supported for amdhsa OS"); 2951 2952 StringRef KernelName; 2953 if (getParser().parseIdentifier(KernelName)) 2954 return true; 2955 2956 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2957 2958 StringSet<> Seen; 2959 2960 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2961 2962 SMRange VGPRRange; 2963 uint64_t NextFreeVGPR = 0; 2964 SMRange SGPRRange; 2965 uint64_t NextFreeSGPR = 0; 2966 unsigned UserSGPRCount = 0; 2967 bool ReserveVCC = true; 2968 bool ReserveFlatScr = true; 2969 bool ReserveXNACK = hasXNACK(); 2970 2971 while (true) { 2972 while (getLexer().is(AsmToken::EndOfStatement)) 2973 Lex(); 2974 2975 if (getLexer().isNot(AsmToken::Identifier)) 2976 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2977 2978 StringRef ID = getTok().getIdentifier(); 2979 SMRange IDRange = getTok().getLocRange(); 2980 Lex(); 2981 2982 if (ID == ".end_amdhsa_kernel") 2983 break; 2984 2985 if (Seen.find(ID) != Seen.end()) 2986 return TokError(".amdhsa_ directives cannot be repeated"); 2987 Seen.insert(ID); 2988 2989 SMLoc ValStart = getTok().getLoc(); 2990 int64_t IVal; 2991 if (getParser().parseAbsoluteExpression(IVal)) 2992 return true; 2993 SMLoc ValEnd = getTok().getLoc(); 2994 SMRange ValRange = SMRange(ValStart, ValEnd); 2995 2996 if (IVal < 0) 2997 return OutOfRangeError(ValRange); 2998 2999 uint64_t Val = IVal; 3000 3001 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3002 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3003 return OutOfRangeError(RANGE); \ 3004 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3005 3006 if (ID == ".amdhsa_group_segment_fixed_size") { 3007 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3008 return OutOfRangeError(ValRange); 3009 KD.group_segment_fixed_size = Val; 3010 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3011 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3012 return OutOfRangeError(ValRange); 3013 KD.private_segment_fixed_size = Val; 3014 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3015 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3016 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3017 Val, ValRange); 3018 UserSGPRCount += 4; 3019 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3020 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3021 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3022 ValRange); 3023 UserSGPRCount += 2; 3024 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3025 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3026 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3027 ValRange); 3028 UserSGPRCount += 2; 3029 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3030 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3031 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3032 Val, ValRange); 3033 UserSGPRCount += 2; 3034 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3035 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3036 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3037 ValRange); 3038 UserSGPRCount += 2; 3039 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3040 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3041 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3042 ValRange); 3043 UserSGPRCount += 2; 3044 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3045 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3046 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3047 Val, ValRange); 3048 UserSGPRCount += 1; 3049 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3050 PARSE_BITS_ENTRY( 3051 KD.compute_pgm_rsrc2, 3052 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3053 ValRange); 3054 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3055 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3056 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3057 ValRange); 3058 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3059 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3060 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3061 ValRange); 3062 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3063 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3064 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3065 ValRange); 3066 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3067 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3068 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3069 ValRange); 3070 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3071 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3072 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3073 ValRange); 3074 } else if (ID == ".amdhsa_next_free_vgpr") { 3075 VGPRRange = ValRange; 3076 NextFreeVGPR = Val; 3077 } else if (ID == ".amdhsa_next_free_sgpr") { 3078 SGPRRange = ValRange; 3079 NextFreeSGPR = Val; 3080 } else if (ID == ".amdhsa_reserve_vcc") { 3081 if (!isUInt<1>(Val)) 3082 return OutOfRangeError(ValRange); 3083 ReserveVCC = Val; 3084 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3085 if (IVersion.Major < 7) 3086 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3087 IDRange); 3088 if (!isUInt<1>(Val)) 3089 return OutOfRangeError(ValRange); 3090 ReserveFlatScr = Val; 3091 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3092 if (IVersion.Major < 8) 3093 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3094 IDRange); 3095 if (!isUInt<1>(Val)) 3096 return OutOfRangeError(ValRange); 3097 ReserveXNACK = Val; 3098 } else if (ID == ".amdhsa_float_round_mode_32") { 3099 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3100 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3101 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3102 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3103 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3104 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3105 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3106 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3107 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3108 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3109 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3110 ValRange); 3111 } else if (ID == ".amdhsa_dx10_clamp") { 3112 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3113 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3114 } else if (ID == ".amdhsa_ieee_mode") { 3115 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3116 Val, ValRange); 3117 } else if (ID == ".amdhsa_fp16_overflow") { 3118 if (IVersion.Major < 9) 3119 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3120 IDRange); 3121 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3122 ValRange); 3123 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3124 PARSE_BITS_ENTRY( 3125 KD.compute_pgm_rsrc2, 3126 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3127 ValRange); 3128 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3129 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3130 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3131 Val, ValRange); 3132 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3133 PARSE_BITS_ENTRY( 3134 KD.compute_pgm_rsrc2, 3135 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3136 ValRange); 3137 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3138 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3139 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3140 Val, ValRange); 3141 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3142 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3143 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3144 Val, ValRange); 3145 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3146 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3147 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3148 Val, ValRange); 3149 } else if (ID == ".amdhsa_exception_int_div_zero") { 3150 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3151 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3152 Val, ValRange); 3153 } else { 3154 return getParser().Error(IDRange.Start, 3155 "unknown .amdhsa_kernel directive", IDRange); 3156 } 3157 3158 #undef PARSE_BITS_ENTRY 3159 } 3160 3161 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3162 return TokError(".amdhsa_next_free_vgpr directive is required"); 3163 3164 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3165 return TokError(".amdhsa_next_free_sgpr directive is required"); 3166 3167 unsigned VGPRBlocks; 3168 unsigned SGPRBlocks; 3169 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3170 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3171 SGPRRange, VGPRBlocks, SGPRBlocks)) 3172 return true; 3173 3174 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3175 VGPRBlocks)) 3176 return OutOfRangeError(VGPRRange); 3177 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3178 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3179 3180 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3181 SGPRBlocks)) 3182 return OutOfRangeError(SGPRRange); 3183 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3184 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3185 SGPRBlocks); 3186 3187 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3188 return TokError("too many user SGPRs enabled"); 3189 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3190 UserSGPRCount); 3191 3192 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3193 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3194 ReserveFlatScr, ReserveXNACK); 3195 return false; 3196 } 3197 3198 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3199 uint32_t Major; 3200 uint32_t Minor; 3201 3202 if (ParseDirectiveMajorMinor(Major, Minor)) 3203 return true; 3204 3205 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3206 return false; 3207 } 3208 3209 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3210 uint32_t Major; 3211 uint32_t Minor; 3212 uint32_t Stepping; 3213 StringRef VendorName; 3214 StringRef ArchName; 3215 3216 // If this directive has no arguments, then use the ISA version for the 3217 // targeted GPU. 3218 if (getLexer().is(AsmToken::EndOfStatement)) { 3219 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3220 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3221 ISA.Stepping, 3222 "AMD", "AMDGPU"); 3223 return false; 3224 } 3225 3226 if (ParseDirectiveMajorMinor(Major, Minor)) 3227 return true; 3228 3229 if (getLexer().isNot(AsmToken::Comma)) 3230 return TokError("stepping version number required, comma expected"); 3231 Lex(); 3232 3233 if (ParseAsAbsoluteExpression(Stepping)) 3234 return TokError("invalid stepping version"); 3235 3236 if (getLexer().isNot(AsmToken::Comma)) 3237 return TokError("vendor name required, comma expected"); 3238 Lex(); 3239 3240 if (getLexer().isNot(AsmToken::String)) 3241 return TokError("invalid vendor name"); 3242 3243 VendorName = getLexer().getTok().getStringContents(); 3244 Lex(); 3245 3246 if (getLexer().isNot(AsmToken::Comma)) 3247 return TokError("arch name required, comma expected"); 3248 Lex(); 3249 3250 if (getLexer().isNot(AsmToken::String)) 3251 return TokError("invalid arch name"); 3252 3253 ArchName = getLexer().getTok().getStringContents(); 3254 Lex(); 3255 3256 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3257 VendorName, ArchName); 3258 return false; 3259 } 3260 3261 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3262 amd_kernel_code_t &Header) { 3263 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3264 // assembly for backwards compatibility. 3265 if (ID == "max_scratch_backing_memory_byte_size") { 3266 Parser.eatToEndOfStatement(); 3267 return false; 3268 } 3269 3270 SmallString<40> ErrStr; 3271 raw_svector_ostream Err(ErrStr); 3272 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3273 return TokError(Err.str()); 3274 } 3275 Lex(); 3276 return false; 3277 } 3278 3279 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3280 amd_kernel_code_t Header; 3281 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3282 3283 while (true) { 3284 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3285 // will set the current token to EndOfStatement. 3286 while(getLexer().is(AsmToken::EndOfStatement)) 3287 Lex(); 3288 3289 if (getLexer().isNot(AsmToken::Identifier)) 3290 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3291 3292 StringRef ID = getLexer().getTok().getIdentifier(); 3293 Lex(); 3294 3295 if (ID == ".end_amd_kernel_code_t") 3296 break; 3297 3298 if (ParseAMDKernelCodeTValue(ID, Header)) 3299 return true; 3300 } 3301 3302 getTargetStreamer().EmitAMDKernelCodeT(Header); 3303 3304 return false; 3305 } 3306 3307 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3308 if (getLexer().isNot(AsmToken::Identifier)) 3309 return TokError("expected symbol name"); 3310 3311 StringRef KernelName = Parser.getTok().getString(); 3312 3313 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3314 ELF::STT_AMDGPU_HSA_KERNEL); 3315 Lex(); 3316 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3317 KernelScope.initialize(getContext()); 3318 return false; 3319 } 3320 3321 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3322 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3323 return Error(getParser().getTok().getLoc(), 3324 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3325 "architectures"); 3326 } 3327 3328 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3329 3330 std::string ISAVersionStringFromSTI; 3331 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3332 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3333 3334 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3335 return Error(getParser().getTok().getLoc(), 3336 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3337 "arguments specified through the command line"); 3338 } 3339 3340 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3341 Lex(); 3342 3343 return false; 3344 } 3345 3346 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3347 const char *AssemblerDirectiveBegin; 3348 const char *AssemblerDirectiveEnd; 3349 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3350 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3351 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3352 HSAMD::V3::AssemblerDirectiveEnd) 3353 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3354 HSAMD::AssemblerDirectiveEnd); 3355 3356 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3357 return Error(getParser().getTok().getLoc(), 3358 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3359 "not available on non-amdhsa OSes")).str()); 3360 } 3361 3362 std::string HSAMetadataString; 3363 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3364 HSAMetadataString)) 3365 return true; 3366 3367 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3368 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3369 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3370 } else { 3371 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3372 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3373 } 3374 3375 return false; 3376 } 3377 3378 /// Common code to parse out a block of text (typically YAML) between start and 3379 /// end directives. 3380 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3381 const char *AssemblerDirectiveEnd, 3382 std::string &CollectString) { 3383 3384 raw_string_ostream CollectStream(CollectString); 3385 3386 getLexer().setSkipSpace(false); 3387 3388 bool FoundEnd = false; 3389 while (!getLexer().is(AsmToken::Eof)) { 3390 while (getLexer().is(AsmToken::Space)) { 3391 CollectStream << getLexer().getTok().getString(); 3392 Lex(); 3393 } 3394 3395 if (getLexer().is(AsmToken::Identifier)) { 3396 StringRef ID = getLexer().getTok().getIdentifier(); 3397 if (ID == AssemblerDirectiveEnd) { 3398 Lex(); 3399 FoundEnd = true; 3400 break; 3401 } 3402 } 3403 3404 CollectStream << Parser.parseStringToEndOfStatement() 3405 << getContext().getAsmInfo()->getSeparatorString(); 3406 3407 Parser.eatToEndOfStatement(); 3408 } 3409 3410 getLexer().setSkipSpace(true); 3411 3412 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3413 return TokError(Twine("expected directive ") + 3414 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3415 } 3416 3417 CollectStream.flush(); 3418 return false; 3419 } 3420 3421 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3422 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3423 std::string String; 3424 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3425 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3426 return true; 3427 3428 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3429 if (!PALMetadata->setFromString(String)) 3430 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3431 return false; 3432 } 3433 3434 /// Parse the assembler directive for old linear-format PAL metadata. 3435 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3436 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3437 return Error(getParser().getTok().getLoc(), 3438 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3439 "not available on non-amdpal OSes")).str()); 3440 } 3441 3442 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3443 PALMetadata->setLegacy(); 3444 for (;;) { 3445 uint32_t Key, Value; 3446 if (ParseAsAbsoluteExpression(Key)) { 3447 return TokError(Twine("invalid value in ") + 3448 Twine(PALMD::AssemblerDirective)); 3449 } 3450 if (getLexer().isNot(AsmToken::Comma)) { 3451 return TokError(Twine("expected an even number of values in ") + 3452 Twine(PALMD::AssemblerDirective)); 3453 } 3454 Lex(); 3455 if (ParseAsAbsoluteExpression(Value)) { 3456 return TokError(Twine("invalid value in ") + 3457 Twine(PALMD::AssemblerDirective)); 3458 } 3459 PALMetadata->setRegister(Key, Value); 3460 if (getLexer().isNot(AsmToken::Comma)) 3461 break; 3462 Lex(); 3463 } 3464 return false; 3465 } 3466 3467 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3468 StringRef IDVal = DirectiveID.getString(); 3469 3470 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3471 if (IDVal == ".amdgcn_target") 3472 return ParseDirectiveAMDGCNTarget(); 3473 3474 if (IDVal == ".amdhsa_kernel") 3475 return ParseDirectiveAMDHSAKernel(); 3476 3477 // TODO: Restructure/combine with PAL metadata directive. 3478 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3479 return ParseDirectiveHSAMetadata(); 3480 } else { 3481 if (IDVal == ".hsa_code_object_version") 3482 return ParseDirectiveHSACodeObjectVersion(); 3483 3484 if (IDVal == ".hsa_code_object_isa") 3485 return ParseDirectiveHSACodeObjectISA(); 3486 3487 if (IDVal == ".amd_kernel_code_t") 3488 return ParseDirectiveAMDKernelCodeT(); 3489 3490 if (IDVal == ".amdgpu_hsa_kernel") 3491 return ParseDirectiveAMDGPUHsaKernel(); 3492 3493 if (IDVal == ".amd_amdgpu_isa") 3494 return ParseDirectiveISAVersion(); 3495 3496 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3497 return ParseDirectiveHSAMetadata(); 3498 } 3499 3500 if (IDVal == PALMD::AssemblerDirectiveBegin) 3501 return ParseDirectivePALMetadataBegin(); 3502 3503 if (IDVal == PALMD::AssemblerDirective) 3504 return ParseDirectivePALMetadata(); 3505 3506 return true; 3507 } 3508 3509 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3510 unsigned RegNo) const { 3511 3512 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3513 R.isValid(); ++R) { 3514 if (*R == RegNo) 3515 return isGFX9(); 3516 } 3517 3518 switch (RegNo) { 3519 case AMDGPU::TBA: 3520 case AMDGPU::TBA_LO: 3521 case AMDGPU::TBA_HI: 3522 case AMDGPU::TMA: 3523 case AMDGPU::TMA_LO: 3524 case AMDGPU::TMA_HI: 3525 return !isGFX9(); 3526 case AMDGPU::XNACK_MASK: 3527 case AMDGPU::XNACK_MASK_LO: 3528 case AMDGPU::XNACK_MASK_HI: 3529 return !isCI() && !isSI() && hasXNACK(); 3530 default: 3531 break; 3532 } 3533 3534 if (isInlineValue(RegNo)) 3535 return !isCI() && !isSI() && !isVI(); 3536 3537 if (isCI()) 3538 return true; 3539 3540 if (isSI()) { 3541 // No flat_scr 3542 switch (RegNo) { 3543 case AMDGPU::FLAT_SCR: 3544 case AMDGPU::FLAT_SCR_LO: 3545 case AMDGPU::FLAT_SCR_HI: 3546 return false; 3547 default: 3548 return true; 3549 } 3550 } 3551 3552 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3553 // SI/CI have. 3554 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3555 R.isValid(); ++R) { 3556 if (*R == RegNo) 3557 return false; 3558 } 3559 3560 return true; 3561 } 3562 3563 OperandMatchResultTy 3564 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3565 // Try to parse with a custom parser 3566 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3567 3568 // If we successfully parsed the operand or if there as an error parsing, 3569 // we are done. 3570 // 3571 // If we are parsing after we reach EndOfStatement then this means we 3572 // are appending default values to the Operands list. This is only done 3573 // by custom parser, so we shouldn't continue on to the generic parsing. 3574 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3575 getLexer().is(AsmToken::EndOfStatement)) 3576 return ResTy; 3577 3578 ResTy = parseRegOrImm(Operands); 3579 3580 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) 3581 return ResTy; 3582 3583 const auto &Tok = Parser.getTok(); 3584 SMLoc S = Tok.getLoc(); 3585 3586 const MCExpr *Expr = nullptr; 3587 if (!Parser.parseExpression(Expr)) { 3588 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3589 return MatchOperand_Success; 3590 } 3591 3592 // Possibly this is an instruction flag like 'gds'. 3593 if (Tok.getKind() == AsmToken::Identifier) { 3594 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3595 Parser.Lex(); 3596 return MatchOperand_Success; 3597 } 3598 3599 return MatchOperand_NoMatch; 3600 } 3601 3602 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3603 // Clear any forced encodings from the previous instruction. 3604 setForcedEncodingSize(0); 3605 setForcedDPP(false); 3606 setForcedSDWA(false); 3607 3608 if (Name.endswith("_e64")) { 3609 setForcedEncodingSize(64); 3610 return Name.substr(0, Name.size() - 4); 3611 } else if (Name.endswith("_e32")) { 3612 setForcedEncodingSize(32); 3613 return Name.substr(0, Name.size() - 4); 3614 } else if (Name.endswith("_dpp")) { 3615 setForcedDPP(true); 3616 return Name.substr(0, Name.size() - 4); 3617 } else if (Name.endswith("_sdwa")) { 3618 setForcedSDWA(true); 3619 return Name.substr(0, Name.size() - 5); 3620 } 3621 return Name; 3622 } 3623 3624 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3625 StringRef Name, 3626 SMLoc NameLoc, OperandVector &Operands) { 3627 // Add the instruction mnemonic 3628 Name = parseMnemonicSuffix(Name); 3629 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3630 3631 while (!getLexer().is(AsmToken::EndOfStatement)) { 3632 OperandMatchResultTy Res = parseOperand(Operands, Name); 3633 3634 // Eat the comma or space if there is one. 3635 if (getLexer().is(AsmToken::Comma)) 3636 Parser.Lex(); 3637 3638 switch (Res) { 3639 case MatchOperand_Success: break; 3640 case MatchOperand_ParseFail: 3641 Error(getLexer().getLoc(), "failed parsing operand."); 3642 while (!getLexer().is(AsmToken::EndOfStatement)) { 3643 Parser.Lex(); 3644 } 3645 return true; 3646 case MatchOperand_NoMatch: 3647 Error(getLexer().getLoc(), "not a valid operand."); 3648 while (!getLexer().is(AsmToken::EndOfStatement)) { 3649 Parser.Lex(); 3650 } 3651 return true; 3652 } 3653 } 3654 3655 return false; 3656 } 3657 3658 //===----------------------------------------------------------------------===// 3659 // Utility functions 3660 //===----------------------------------------------------------------------===// 3661 3662 OperandMatchResultTy 3663 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3664 switch(getLexer().getKind()) { 3665 default: return MatchOperand_NoMatch; 3666 case AsmToken::Identifier: { 3667 StringRef Name = Parser.getTok().getString(); 3668 if (!Name.equals(Prefix)) { 3669 return MatchOperand_NoMatch; 3670 } 3671 3672 Parser.Lex(); 3673 if (getLexer().isNot(AsmToken::Colon)) 3674 return MatchOperand_ParseFail; 3675 3676 Parser.Lex(); 3677 3678 bool IsMinus = false; 3679 if (getLexer().getKind() == AsmToken::Minus) { 3680 Parser.Lex(); 3681 IsMinus = true; 3682 } 3683 3684 if (getLexer().isNot(AsmToken::Integer)) 3685 return MatchOperand_ParseFail; 3686 3687 if (getParser().parseAbsoluteExpression(Int)) 3688 return MatchOperand_ParseFail; 3689 3690 if (IsMinus) 3691 Int = -Int; 3692 break; 3693 } 3694 } 3695 return MatchOperand_Success; 3696 } 3697 3698 OperandMatchResultTy 3699 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3700 AMDGPUOperand::ImmTy ImmTy, 3701 bool (*ConvertResult)(int64_t&)) { 3702 SMLoc S = Parser.getTok().getLoc(); 3703 int64_t Value = 0; 3704 3705 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3706 if (Res != MatchOperand_Success) 3707 return Res; 3708 3709 if (ConvertResult && !ConvertResult(Value)) { 3710 return MatchOperand_ParseFail; 3711 } 3712 3713 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3714 return MatchOperand_Success; 3715 } 3716 3717 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3718 const char *Prefix, 3719 OperandVector &Operands, 3720 AMDGPUOperand::ImmTy ImmTy, 3721 bool (*ConvertResult)(int64_t&)) { 3722 StringRef Name = Parser.getTok().getString(); 3723 if (!Name.equals(Prefix)) 3724 return MatchOperand_NoMatch; 3725 3726 Parser.Lex(); 3727 if (getLexer().isNot(AsmToken::Colon)) 3728 return MatchOperand_ParseFail; 3729 3730 Parser.Lex(); 3731 if (getLexer().isNot(AsmToken::LBrac)) 3732 return MatchOperand_ParseFail; 3733 Parser.Lex(); 3734 3735 unsigned Val = 0; 3736 SMLoc S = Parser.getTok().getLoc(); 3737 3738 // FIXME: How to verify the number of elements matches the number of src 3739 // operands? 3740 for (int I = 0; I < 4; ++I) { 3741 if (I != 0) { 3742 if (getLexer().is(AsmToken::RBrac)) 3743 break; 3744 3745 if (getLexer().isNot(AsmToken::Comma)) 3746 return MatchOperand_ParseFail; 3747 Parser.Lex(); 3748 } 3749 3750 if (getLexer().isNot(AsmToken::Integer)) 3751 return MatchOperand_ParseFail; 3752 3753 int64_t Op; 3754 if (getParser().parseAbsoluteExpression(Op)) 3755 return MatchOperand_ParseFail; 3756 3757 if (Op != 0 && Op != 1) 3758 return MatchOperand_ParseFail; 3759 Val |= (Op << I); 3760 } 3761 3762 Parser.Lex(); 3763 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3764 return MatchOperand_Success; 3765 } 3766 3767 OperandMatchResultTy 3768 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3769 AMDGPUOperand::ImmTy ImmTy) { 3770 int64_t Bit = 0; 3771 SMLoc S = Parser.getTok().getLoc(); 3772 3773 // We are at the end of the statement, and this is a default argument, so 3774 // use a default value. 3775 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3776 switch(getLexer().getKind()) { 3777 case AsmToken::Identifier: { 3778 StringRef Tok = Parser.getTok().getString(); 3779 if (Tok == Name) { 3780 if (Tok == "r128" && isGFX9()) 3781 Error(S, "r128 modifier is not supported on this GPU"); 3782 if (Tok == "a16" && !isGFX9()) 3783 Error(S, "a16 modifier is not supported on this GPU"); 3784 Bit = 1; 3785 Parser.Lex(); 3786 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3787 Bit = 0; 3788 Parser.Lex(); 3789 } else { 3790 return MatchOperand_NoMatch; 3791 } 3792 break; 3793 } 3794 default: 3795 return MatchOperand_NoMatch; 3796 } 3797 } 3798 3799 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3800 return MatchOperand_Success; 3801 } 3802 3803 static void addOptionalImmOperand( 3804 MCInst& Inst, const OperandVector& Operands, 3805 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3806 AMDGPUOperand::ImmTy ImmT, 3807 int64_t Default = 0) { 3808 auto i = OptionalIdx.find(ImmT); 3809 if (i != OptionalIdx.end()) { 3810 unsigned Idx = i->second; 3811 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3812 } else { 3813 Inst.addOperand(MCOperand::createImm(Default)); 3814 } 3815 } 3816 3817 OperandMatchResultTy 3818 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3819 if (getLexer().isNot(AsmToken::Identifier)) { 3820 return MatchOperand_NoMatch; 3821 } 3822 StringRef Tok = Parser.getTok().getString(); 3823 if (Tok != Prefix) { 3824 return MatchOperand_NoMatch; 3825 } 3826 3827 Parser.Lex(); 3828 if (getLexer().isNot(AsmToken::Colon)) { 3829 return MatchOperand_ParseFail; 3830 } 3831 3832 Parser.Lex(); 3833 if (getLexer().isNot(AsmToken::Identifier)) { 3834 return MatchOperand_ParseFail; 3835 } 3836 3837 Value = Parser.getTok().getString(); 3838 return MatchOperand_Success; 3839 } 3840 3841 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3842 // values to live in a joint format operand in the MCInst encoding. 3843 OperandMatchResultTy 3844 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3845 SMLoc S = Parser.getTok().getLoc(); 3846 int64_t Dfmt = 0, Nfmt = 0; 3847 // dfmt and nfmt can appear in either order, and each is optional. 3848 bool GotDfmt = false, GotNfmt = false; 3849 while (!GotDfmt || !GotNfmt) { 3850 if (!GotDfmt) { 3851 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3852 if (Res != MatchOperand_NoMatch) { 3853 if (Res != MatchOperand_Success) 3854 return Res; 3855 if (Dfmt >= 16) { 3856 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3857 return MatchOperand_ParseFail; 3858 } 3859 GotDfmt = true; 3860 Parser.Lex(); 3861 continue; 3862 } 3863 } 3864 if (!GotNfmt) { 3865 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3866 if (Res != MatchOperand_NoMatch) { 3867 if (Res != MatchOperand_Success) 3868 return Res; 3869 if (Nfmt >= 8) { 3870 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3871 return MatchOperand_ParseFail; 3872 } 3873 GotNfmt = true; 3874 Parser.Lex(); 3875 continue; 3876 } 3877 } 3878 break; 3879 } 3880 if (!GotDfmt && !GotNfmt) 3881 return MatchOperand_NoMatch; 3882 auto Format = Dfmt | Nfmt << 4; 3883 Operands.push_back( 3884 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3885 return MatchOperand_Success; 3886 } 3887 3888 //===----------------------------------------------------------------------===// 3889 // ds 3890 //===----------------------------------------------------------------------===// 3891 3892 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3893 const OperandVector &Operands) { 3894 OptionalImmIndexMap OptionalIdx; 3895 3896 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3897 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3898 3899 // Add the register arguments 3900 if (Op.isReg()) { 3901 Op.addRegOperands(Inst, 1); 3902 continue; 3903 } 3904 3905 // Handle optional arguments 3906 OptionalIdx[Op.getImmTy()] = i; 3907 } 3908 3909 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3910 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3911 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3912 3913 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3914 } 3915 3916 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3917 bool IsGdsHardcoded) { 3918 OptionalImmIndexMap OptionalIdx; 3919 3920 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3921 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3922 3923 // Add the register arguments 3924 if (Op.isReg()) { 3925 Op.addRegOperands(Inst, 1); 3926 continue; 3927 } 3928 3929 if (Op.isToken() && Op.getToken() == "gds") { 3930 IsGdsHardcoded = true; 3931 continue; 3932 } 3933 3934 // Handle optional arguments 3935 OptionalIdx[Op.getImmTy()] = i; 3936 } 3937 3938 AMDGPUOperand::ImmTy OffsetType = 3939 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3940 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3941 AMDGPUOperand::ImmTyOffset; 3942 3943 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3944 3945 if (!IsGdsHardcoded) { 3946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3947 } 3948 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3949 } 3950 3951 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3952 OptionalImmIndexMap OptionalIdx; 3953 3954 unsigned OperandIdx[4]; 3955 unsigned EnMask = 0; 3956 int SrcIdx = 0; 3957 3958 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3959 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3960 3961 // Add the register arguments 3962 if (Op.isReg()) { 3963 assert(SrcIdx < 4); 3964 OperandIdx[SrcIdx] = Inst.size(); 3965 Op.addRegOperands(Inst, 1); 3966 ++SrcIdx; 3967 continue; 3968 } 3969 3970 if (Op.isOff()) { 3971 assert(SrcIdx < 4); 3972 OperandIdx[SrcIdx] = Inst.size(); 3973 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3974 ++SrcIdx; 3975 continue; 3976 } 3977 3978 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3979 Op.addImmOperands(Inst, 1); 3980 continue; 3981 } 3982 3983 if (Op.isToken() && Op.getToken() == "done") 3984 continue; 3985 3986 // Handle optional arguments 3987 OptionalIdx[Op.getImmTy()] = i; 3988 } 3989 3990 assert(SrcIdx == 4); 3991 3992 bool Compr = false; 3993 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 3994 Compr = true; 3995 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 3996 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 3997 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 3998 } 3999 4000 for (auto i = 0; i < SrcIdx; ++i) { 4001 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4002 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4003 } 4004 } 4005 4006 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4007 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4008 4009 Inst.addOperand(MCOperand::createImm(EnMask)); 4010 } 4011 4012 //===----------------------------------------------------------------------===// 4013 // s_waitcnt 4014 //===----------------------------------------------------------------------===// 4015 4016 static bool 4017 encodeCnt( 4018 const AMDGPU::IsaVersion ISA, 4019 int64_t &IntVal, 4020 int64_t CntVal, 4021 bool Saturate, 4022 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4023 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4024 { 4025 bool Failed = false; 4026 4027 IntVal = encode(ISA, IntVal, CntVal); 4028 if (CntVal != decode(ISA, IntVal)) { 4029 if (Saturate) { 4030 IntVal = encode(ISA, IntVal, -1); 4031 } else { 4032 Failed = true; 4033 } 4034 } 4035 return Failed; 4036 } 4037 4038 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4039 StringRef CntName = Parser.getTok().getString(); 4040 int64_t CntVal; 4041 4042 Parser.Lex(); 4043 if (getLexer().isNot(AsmToken::LParen)) 4044 return true; 4045 4046 Parser.Lex(); 4047 if (getLexer().isNot(AsmToken::Integer)) 4048 return true; 4049 4050 SMLoc ValLoc = Parser.getTok().getLoc(); 4051 if (getParser().parseAbsoluteExpression(CntVal)) 4052 return true; 4053 4054 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4055 4056 bool Failed = true; 4057 bool Sat = CntName.endswith("_sat"); 4058 4059 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4060 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4061 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4062 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4063 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4064 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4065 } 4066 4067 if (Failed) { 4068 Error(ValLoc, "too large value for " + CntName); 4069 return true; 4070 } 4071 4072 if (getLexer().isNot(AsmToken::RParen)) { 4073 return true; 4074 } 4075 4076 Parser.Lex(); 4077 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 4078 const AsmToken NextToken = getLexer().peekTok(); 4079 if (NextToken.is(AsmToken::Identifier)) { 4080 Parser.Lex(); 4081 } 4082 } 4083 4084 return false; 4085 } 4086 4087 OperandMatchResultTy 4088 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4089 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4090 int64_t Waitcnt = getWaitcntBitMask(ISA); 4091 SMLoc S = Parser.getTok().getLoc(); 4092 4093 switch(getLexer().getKind()) { 4094 default: return MatchOperand_ParseFail; 4095 case AsmToken::Integer: 4096 // The operand can be an integer value. 4097 if (getParser().parseAbsoluteExpression(Waitcnt)) 4098 return MatchOperand_ParseFail; 4099 break; 4100 4101 case AsmToken::Identifier: 4102 do { 4103 if (parseCnt(Waitcnt)) 4104 return MatchOperand_ParseFail; 4105 } while(getLexer().isNot(AsmToken::EndOfStatement)); 4106 break; 4107 } 4108 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4109 return MatchOperand_Success; 4110 } 4111 4112 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4113 int64_t &Width) { 4114 using namespace llvm::AMDGPU::Hwreg; 4115 4116 if (Parser.getTok().getString() != "hwreg") 4117 return true; 4118 Parser.Lex(); 4119 4120 if (getLexer().isNot(AsmToken::LParen)) 4121 return true; 4122 Parser.Lex(); 4123 4124 if (getLexer().is(AsmToken::Identifier)) { 4125 HwReg.IsSymbolic = true; 4126 HwReg.Id = ID_UNKNOWN_; 4127 const StringRef tok = Parser.getTok().getString(); 4128 int Last = ID_SYMBOLIC_LAST_; 4129 if (isSI() || isCI() || isVI()) 4130 Last = ID_SYMBOLIC_FIRST_GFX9_; 4131 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4132 if (tok == IdSymbolic[i]) { 4133 HwReg.Id = i; 4134 break; 4135 } 4136 } 4137 Parser.Lex(); 4138 } else { 4139 HwReg.IsSymbolic = false; 4140 if (getLexer().isNot(AsmToken::Integer)) 4141 return true; 4142 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4143 return true; 4144 } 4145 4146 if (getLexer().is(AsmToken::RParen)) { 4147 Parser.Lex(); 4148 return false; 4149 } 4150 4151 // optional params 4152 if (getLexer().isNot(AsmToken::Comma)) 4153 return true; 4154 Parser.Lex(); 4155 4156 if (getLexer().isNot(AsmToken::Integer)) 4157 return true; 4158 if (getParser().parseAbsoluteExpression(Offset)) 4159 return true; 4160 4161 if (getLexer().isNot(AsmToken::Comma)) 4162 return true; 4163 Parser.Lex(); 4164 4165 if (getLexer().isNot(AsmToken::Integer)) 4166 return true; 4167 if (getParser().parseAbsoluteExpression(Width)) 4168 return true; 4169 4170 if (getLexer().isNot(AsmToken::RParen)) 4171 return true; 4172 Parser.Lex(); 4173 4174 return false; 4175 } 4176 4177 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4178 using namespace llvm::AMDGPU::Hwreg; 4179 4180 int64_t Imm16Val = 0; 4181 SMLoc S = Parser.getTok().getLoc(); 4182 4183 switch(getLexer().getKind()) { 4184 default: return MatchOperand_NoMatch; 4185 case AsmToken::Integer: 4186 // The operand can be an integer value. 4187 if (getParser().parseAbsoluteExpression(Imm16Val)) 4188 return MatchOperand_NoMatch; 4189 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4190 Error(S, "invalid immediate: only 16-bit values are legal"); 4191 // Do not return error code, but create an imm operand anyway and proceed 4192 // to the next operand, if any. That avoids unneccessary error messages. 4193 } 4194 break; 4195 4196 case AsmToken::Identifier: { 4197 OperandInfoTy HwReg(ID_UNKNOWN_); 4198 int64_t Offset = OFFSET_DEFAULT_; 4199 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4200 if (parseHwregConstruct(HwReg, Offset, Width)) 4201 return MatchOperand_ParseFail; 4202 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4203 if (HwReg.IsSymbolic) 4204 Error(S, "invalid symbolic name of hardware register"); 4205 else 4206 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4207 } 4208 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4209 Error(S, "invalid bit offset: only 5-bit values are legal"); 4210 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4211 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4212 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4213 } 4214 break; 4215 } 4216 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4217 return MatchOperand_Success; 4218 } 4219 4220 bool AMDGPUOperand::isSWaitCnt() const { 4221 return isImm(); 4222 } 4223 4224 bool AMDGPUOperand::isHwreg() const { 4225 return isImmTy(ImmTyHwreg); 4226 } 4227 4228 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4229 using namespace llvm::AMDGPU::SendMsg; 4230 4231 if (Parser.getTok().getString() != "sendmsg") 4232 return true; 4233 Parser.Lex(); 4234 4235 if (getLexer().isNot(AsmToken::LParen)) 4236 return true; 4237 Parser.Lex(); 4238 4239 if (getLexer().is(AsmToken::Identifier)) { 4240 Msg.IsSymbolic = true; 4241 Msg.Id = ID_UNKNOWN_; 4242 const std::string tok = Parser.getTok().getString(); 4243 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4244 switch(i) { 4245 default: continue; // Omit gaps. 4246 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 4247 } 4248 if (tok == IdSymbolic[i]) { 4249 Msg.Id = i; 4250 break; 4251 } 4252 } 4253 Parser.Lex(); 4254 } else { 4255 Msg.IsSymbolic = false; 4256 if (getLexer().isNot(AsmToken::Integer)) 4257 return true; 4258 if (getParser().parseAbsoluteExpression(Msg.Id)) 4259 return true; 4260 if (getLexer().is(AsmToken::Integer)) 4261 if (getParser().parseAbsoluteExpression(Msg.Id)) 4262 Msg.Id = ID_UNKNOWN_; 4263 } 4264 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4265 return false; 4266 4267 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4268 if (getLexer().isNot(AsmToken::RParen)) 4269 return true; 4270 Parser.Lex(); 4271 return false; 4272 } 4273 4274 if (getLexer().isNot(AsmToken::Comma)) 4275 return true; 4276 Parser.Lex(); 4277 4278 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4279 Operation.Id = ID_UNKNOWN_; 4280 if (getLexer().is(AsmToken::Identifier)) { 4281 Operation.IsSymbolic = true; 4282 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4283 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4284 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4285 const StringRef Tok = Parser.getTok().getString(); 4286 for (int i = F; i < L; ++i) { 4287 if (Tok == S[i]) { 4288 Operation.Id = i; 4289 break; 4290 } 4291 } 4292 Parser.Lex(); 4293 } else { 4294 Operation.IsSymbolic = false; 4295 if (getLexer().isNot(AsmToken::Integer)) 4296 return true; 4297 if (getParser().parseAbsoluteExpression(Operation.Id)) 4298 return true; 4299 } 4300 4301 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4302 // Stream id is optional. 4303 if (getLexer().is(AsmToken::RParen)) { 4304 Parser.Lex(); 4305 return false; 4306 } 4307 4308 if (getLexer().isNot(AsmToken::Comma)) 4309 return true; 4310 Parser.Lex(); 4311 4312 if (getLexer().isNot(AsmToken::Integer)) 4313 return true; 4314 if (getParser().parseAbsoluteExpression(StreamId)) 4315 return true; 4316 } 4317 4318 if (getLexer().isNot(AsmToken::RParen)) 4319 return true; 4320 Parser.Lex(); 4321 return false; 4322 } 4323 4324 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4325 if (getLexer().getKind() != AsmToken::Identifier) 4326 return MatchOperand_NoMatch; 4327 4328 StringRef Str = Parser.getTok().getString(); 4329 int Slot = StringSwitch<int>(Str) 4330 .Case("p10", 0) 4331 .Case("p20", 1) 4332 .Case("p0", 2) 4333 .Default(-1); 4334 4335 SMLoc S = Parser.getTok().getLoc(); 4336 if (Slot == -1) 4337 return MatchOperand_ParseFail; 4338 4339 Parser.Lex(); 4340 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4341 AMDGPUOperand::ImmTyInterpSlot)); 4342 return MatchOperand_Success; 4343 } 4344 4345 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4346 if (getLexer().getKind() != AsmToken::Identifier) 4347 return MatchOperand_NoMatch; 4348 4349 StringRef Str = Parser.getTok().getString(); 4350 if (!Str.startswith("attr")) 4351 return MatchOperand_NoMatch; 4352 4353 StringRef Chan = Str.take_back(2); 4354 int AttrChan = StringSwitch<int>(Chan) 4355 .Case(".x", 0) 4356 .Case(".y", 1) 4357 .Case(".z", 2) 4358 .Case(".w", 3) 4359 .Default(-1); 4360 if (AttrChan == -1) 4361 return MatchOperand_ParseFail; 4362 4363 Str = Str.drop_back(2).drop_front(4); 4364 4365 uint8_t Attr; 4366 if (Str.getAsInteger(10, Attr)) 4367 return MatchOperand_ParseFail; 4368 4369 SMLoc S = Parser.getTok().getLoc(); 4370 Parser.Lex(); 4371 if (Attr > 63) { 4372 Error(S, "out of bounds attr"); 4373 return MatchOperand_Success; 4374 } 4375 4376 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4377 4378 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4379 AMDGPUOperand::ImmTyInterpAttr)); 4380 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4381 AMDGPUOperand::ImmTyAttrChan)); 4382 return MatchOperand_Success; 4383 } 4384 4385 void AMDGPUAsmParser::errorExpTgt() { 4386 Error(Parser.getTok().getLoc(), "invalid exp target"); 4387 } 4388 4389 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4390 uint8_t &Val) { 4391 if (Str == "null") { 4392 Val = 9; 4393 return MatchOperand_Success; 4394 } 4395 4396 if (Str.startswith("mrt")) { 4397 Str = Str.drop_front(3); 4398 if (Str == "z") { // == mrtz 4399 Val = 8; 4400 return MatchOperand_Success; 4401 } 4402 4403 if (Str.getAsInteger(10, Val)) 4404 return MatchOperand_ParseFail; 4405 4406 if (Val > 7) 4407 errorExpTgt(); 4408 4409 return MatchOperand_Success; 4410 } 4411 4412 if (Str.startswith("pos")) { 4413 Str = Str.drop_front(3); 4414 if (Str.getAsInteger(10, Val)) 4415 return MatchOperand_ParseFail; 4416 4417 if (Val > 3) 4418 errorExpTgt(); 4419 4420 Val += 12; 4421 return MatchOperand_Success; 4422 } 4423 4424 if (Str.startswith("param")) { 4425 Str = Str.drop_front(5); 4426 if (Str.getAsInteger(10, Val)) 4427 return MatchOperand_ParseFail; 4428 4429 if (Val >= 32) 4430 errorExpTgt(); 4431 4432 Val += 32; 4433 return MatchOperand_Success; 4434 } 4435 4436 if (Str.startswith("invalid_target_")) { 4437 Str = Str.drop_front(15); 4438 if (Str.getAsInteger(10, Val)) 4439 return MatchOperand_ParseFail; 4440 4441 errorExpTgt(); 4442 return MatchOperand_Success; 4443 } 4444 4445 return MatchOperand_NoMatch; 4446 } 4447 4448 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4449 uint8_t Val; 4450 StringRef Str = Parser.getTok().getString(); 4451 4452 auto Res = parseExpTgtImpl(Str, Val); 4453 if (Res != MatchOperand_Success) 4454 return Res; 4455 4456 SMLoc S = Parser.getTok().getLoc(); 4457 Parser.Lex(); 4458 4459 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4460 AMDGPUOperand::ImmTyExpTgt)); 4461 return MatchOperand_Success; 4462 } 4463 4464 OperandMatchResultTy 4465 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4466 using namespace llvm::AMDGPU::SendMsg; 4467 4468 int64_t Imm16Val = 0; 4469 SMLoc S = Parser.getTok().getLoc(); 4470 4471 switch(getLexer().getKind()) { 4472 default: 4473 return MatchOperand_NoMatch; 4474 case AsmToken::Integer: 4475 // The operand can be an integer value. 4476 if (getParser().parseAbsoluteExpression(Imm16Val)) 4477 return MatchOperand_NoMatch; 4478 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4479 Error(S, "invalid immediate: only 16-bit values are legal"); 4480 // Do not return error code, but create an imm operand anyway and proceed 4481 // to the next operand, if any. That avoids unneccessary error messages. 4482 } 4483 break; 4484 case AsmToken::Identifier: { 4485 OperandInfoTy Msg(ID_UNKNOWN_); 4486 OperandInfoTy Operation(OP_UNKNOWN_); 4487 int64_t StreamId = STREAM_ID_DEFAULT_; 4488 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4489 return MatchOperand_ParseFail; 4490 do { 4491 // Validate and encode message ID. 4492 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4493 || Msg.Id == ID_SYSMSG)) { 4494 if (Msg.IsSymbolic) 4495 Error(S, "invalid/unsupported symbolic name of message"); 4496 else 4497 Error(S, "invalid/unsupported code of message"); 4498 break; 4499 } 4500 Imm16Val = (Msg.Id << ID_SHIFT_); 4501 // Validate and encode operation ID. 4502 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4503 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4504 if (Operation.IsSymbolic) 4505 Error(S, "invalid symbolic name of GS_OP"); 4506 else 4507 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4508 break; 4509 } 4510 if (Operation.Id == OP_GS_NOP 4511 && Msg.Id != ID_GS_DONE) { 4512 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4513 break; 4514 } 4515 Imm16Val |= (Operation.Id << OP_SHIFT_); 4516 } 4517 if (Msg.Id == ID_SYSMSG) { 4518 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4519 if (Operation.IsSymbolic) 4520 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4521 else 4522 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4523 break; 4524 } 4525 Imm16Val |= (Operation.Id << OP_SHIFT_); 4526 } 4527 // Validate and encode stream ID. 4528 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4529 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4530 Error(S, "invalid stream id: only 2-bit values are legal"); 4531 break; 4532 } 4533 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4534 } 4535 } while (false); 4536 } 4537 break; 4538 } 4539 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4540 return MatchOperand_Success; 4541 } 4542 4543 bool AMDGPUOperand::isSendMsg() const { 4544 return isImmTy(ImmTySendMsg); 4545 } 4546 4547 //===----------------------------------------------------------------------===// 4548 // parser helpers 4549 //===----------------------------------------------------------------------===// 4550 4551 bool 4552 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4553 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4554 } 4555 4556 bool 4557 AMDGPUAsmParser::isId(const StringRef Id) const { 4558 return isId(getToken(), Id); 4559 } 4560 4561 bool 4562 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4563 return getTokenKind() == Kind; 4564 } 4565 4566 bool 4567 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4568 if (isId(Id)) { 4569 lex(); 4570 return true; 4571 } 4572 return false; 4573 } 4574 4575 bool 4576 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4577 if (isToken(Kind)) { 4578 lex(); 4579 return true; 4580 } 4581 return false; 4582 } 4583 4584 bool 4585 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4586 const StringRef ErrMsg) { 4587 if (!trySkipToken(Kind)) { 4588 Error(getLoc(), ErrMsg); 4589 return false; 4590 } 4591 return true; 4592 } 4593 4594 bool 4595 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4596 return !getParser().parseAbsoluteExpression(Imm); 4597 } 4598 4599 bool 4600 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4601 if (isToken(AsmToken::String)) { 4602 Val = getToken().getStringContents(); 4603 lex(); 4604 return true; 4605 } else { 4606 Error(getLoc(), ErrMsg); 4607 return false; 4608 } 4609 } 4610 4611 AsmToken 4612 AMDGPUAsmParser::getToken() const { 4613 return Parser.getTok(); 4614 } 4615 4616 AsmToken 4617 AMDGPUAsmParser::peekToken() { 4618 return getLexer().peekTok(); 4619 } 4620 4621 void 4622 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 4623 auto TokCount = getLexer().peekTokens(Tokens); 4624 4625 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 4626 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 4627 } 4628 4629 AsmToken::TokenKind 4630 AMDGPUAsmParser::getTokenKind() const { 4631 return getLexer().getKind(); 4632 } 4633 4634 SMLoc 4635 AMDGPUAsmParser::getLoc() const { 4636 return getToken().getLoc(); 4637 } 4638 4639 StringRef 4640 AMDGPUAsmParser::getTokenStr() const { 4641 return getToken().getString(); 4642 } 4643 4644 void 4645 AMDGPUAsmParser::lex() { 4646 Parser.Lex(); 4647 } 4648 4649 //===----------------------------------------------------------------------===// 4650 // swizzle 4651 //===----------------------------------------------------------------------===// 4652 4653 LLVM_READNONE 4654 static unsigned 4655 encodeBitmaskPerm(const unsigned AndMask, 4656 const unsigned OrMask, 4657 const unsigned XorMask) { 4658 using namespace llvm::AMDGPU::Swizzle; 4659 4660 return BITMASK_PERM_ENC | 4661 (AndMask << BITMASK_AND_SHIFT) | 4662 (OrMask << BITMASK_OR_SHIFT) | 4663 (XorMask << BITMASK_XOR_SHIFT); 4664 } 4665 4666 bool 4667 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4668 const unsigned MinVal, 4669 const unsigned MaxVal, 4670 const StringRef ErrMsg) { 4671 for (unsigned i = 0; i < OpNum; ++i) { 4672 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4673 return false; 4674 } 4675 SMLoc ExprLoc = Parser.getTok().getLoc(); 4676 if (!parseExpr(Op[i])) { 4677 return false; 4678 } 4679 if (Op[i] < MinVal || Op[i] > MaxVal) { 4680 Error(ExprLoc, ErrMsg); 4681 return false; 4682 } 4683 } 4684 4685 return true; 4686 } 4687 4688 bool 4689 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4690 using namespace llvm::AMDGPU::Swizzle; 4691 4692 int64_t Lane[LANE_NUM]; 4693 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4694 "expected a 2-bit lane id")) { 4695 Imm = QUAD_PERM_ENC; 4696 for (unsigned I = 0; I < LANE_NUM; ++I) { 4697 Imm |= Lane[I] << (LANE_SHIFT * I); 4698 } 4699 return true; 4700 } 4701 return false; 4702 } 4703 4704 bool 4705 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4706 using namespace llvm::AMDGPU::Swizzle; 4707 4708 SMLoc S = Parser.getTok().getLoc(); 4709 int64_t GroupSize; 4710 int64_t LaneIdx; 4711 4712 if (!parseSwizzleOperands(1, &GroupSize, 4713 2, 32, 4714 "group size must be in the interval [2,32]")) { 4715 return false; 4716 } 4717 if (!isPowerOf2_64(GroupSize)) { 4718 Error(S, "group size must be a power of two"); 4719 return false; 4720 } 4721 if (parseSwizzleOperands(1, &LaneIdx, 4722 0, GroupSize - 1, 4723 "lane id must be in the interval [0,group size - 1]")) { 4724 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4725 return true; 4726 } 4727 return false; 4728 } 4729 4730 bool 4731 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4732 using namespace llvm::AMDGPU::Swizzle; 4733 4734 SMLoc S = Parser.getTok().getLoc(); 4735 int64_t GroupSize; 4736 4737 if (!parseSwizzleOperands(1, &GroupSize, 4738 2, 32, "group size must be in the interval [2,32]")) { 4739 return false; 4740 } 4741 if (!isPowerOf2_64(GroupSize)) { 4742 Error(S, "group size must be a power of two"); 4743 return false; 4744 } 4745 4746 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4747 return true; 4748 } 4749 4750 bool 4751 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4752 using namespace llvm::AMDGPU::Swizzle; 4753 4754 SMLoc S = Parser.getTok().getLoc(); 4755 int64_t GroupSize; 4756 4757 if (!parseSwizzleOperands(1, &GroupSize, 4758 1, 16, "group size must be in the interval [1,16]")) { 4759 return false; 4760 } 4761 if (!isPowerOf2_64(GroupSize)) { 4762 Error(S, "group size must be a power of two"); 4763 return false; 4764 } 4765 4766 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4767 return true; 4768 } 4769 4770 bool 4771 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4772 using namespace llvm::AMDGPU::Swizzle; 4773 4774 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4775 return false; 4776 } 4777 4778 StringRef Ctl; 4779 SMLoc StrLoc = Parser.getTok().getLoc(); 4780 if (!parseString(Ctl)) { 4781 return false; 4782 } 4783 if (Ctl.size() != BITMASK_WIDTH) { 4784 Error(StrLoc, "expected a 5-character mask"); 4785 return false; 4786 } 4787 4788 unsigned AndMask = 0; 4789 unsigned OrMask = 0; 4790 unsigned XorMask = 0; 4791 4792 for (size_t i = 0; i < Ctl.size(); ++i) { 4793 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4794 switch(Ctl[i]) { 4795 default: 4796 Error(StrLoc, "invalid mask"); 4797 return false; 4798 case '0': 4799 break; 4800 case '1': 4801 OrMask |= Mask; 4802 break; 4803 case 'p': 4804 AndMask |= Mask; 4805 break; 4806 case 'i': 4807 AndMask |= Mask; 4808 XorMask |= Mask; 4809 break; 4810 } 4811 } 4812 4813 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4814 return true; 4815 } 4816 4817 bool 4818 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4819 4820 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4821 4822 if (!parseExpr(Imm)) { 4823 return false; 4824 } 4825 if (!isUInt<16>(Imm)) { 4826 Error(OffsetLoc, "expected a 16-bit offset"); 4827 return false; 4828 } 4829 return true; 4830 } 4831 4832 bool 4833 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4834 using namespace llvm::AMDGPU::Swizzle; 4835 4836 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4837 4838 SMLoc ModeLoc = Parser.getTok().getLoc(); 4839 bool Ok = false; 4840 4841 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4842 Ok = parseSwizzleQuadPerm(Imm); 4843 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4844 Ok = parseSwizzleBitmaskPerm(Imm); 4845 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4846 Ok = parseSwizzleBroadcast(Imm); 4847 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4848 Ok = parseSwizzleSwap(Imm); 4849 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4850 Ok = parseSwizzleReverse(Imm); 4851 } else { 4852 Error(ModeLoc, "expected a swizzle mode"); 4853 } 4854 4855 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4856 } 4857 4858 return false; 4859 } 4860 4861 OperandMatchResultTy 4862 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4863 SMLoc S = Parser.getTok().getLoc(); 4864 int64_t Imm = 0; 4865 4866 if (trySkipId("offset")) { 4867 4868 bool Ok = false; 4869 if (skipToken(AsmToken::Colon, "expected a colon")) { 4870 if (trySkipId("swizzle")) { 4871 Ok = parseSwizzleMacro(Imm); 4872 } else { 4873 Ok = parseSwizzleOffset(Imm); 4874 } 4875 } 4876 4877 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4878 4879 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4880 } else { 4881 // Swizzle "offset" operand is optional. 4882 // If it is omitted, try parsing other optional operands. 4883 return parseOptionalOpr(Operands); 4884 } 4885 } 4886 4887 bool 4888 AMDGPUOperand::isSwizzle() const { 4889 return isImmTy(ImmTySwizzle); 4890 } 4891 4892 //===----------------------------------------------------------------------===// 4893 // VGPR Index Mode 4894 //===----------------------------------------------------------------------===// 4895 4896 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 4897 4898 using namespace llvm::AMDGPU::VGPRIndexMode; 4899 4900 if (trySkipToken(AsmToken::RParen)) { 4901 return OFF; 4902 } 4903 4904 int64_t Imm = 0; 4905 4906 while (true) { 4907 unsigned Mode = 0; 4908 SMLoc S = Parser.getTok().getLoc(); 4909 4910 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 4911 if (trySkipId(IdSymbolic[ModeId])) { 4912 Mode = 1 << ModeId; 4913 break; 4914 } 4915 } 4916 4917 if (Mode == 0) { 4918 Error(S, (Imm == 0)? 4919 "expected a VGPR index mode or a closing parenthesis" : 4920 "expected a VGPR index mode"); 4921 break; 4922 } 4923 4924 if (Imm & Mode) { 4925 Error(S, "duplicate VGPR index mode"); 4926 break; 4927 } 4928 Imm |= Mode; 4929 4930 if (trySkipToken(AsmToken::RParen)) 4931 break; 4932 if (!skipToken(AsmToken::Comma, 4933 "expected a comma or a closing parenthesis")) 4934 break; 4935 } 4936 4937 return Imm; 4938 } 4939 4940 OperandMatchResultTy 4941 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 4942 4943 int64_t Imm = 0; 4944 SMLoc S = Parser.getTok().getLoc(); 4945 4946 if (getLexer().getKind() == AsmToken::Identifier && 4947 Parser.getTok().getString() == "gpr_idx" && 4948 getLexer().peekTok().is(AsmToken::LParen)) { 4949 4950 Parser.Lex(); 4951 Parser.Lex(); 4952 4953 // If parse failed, trigger an error but do not return error code 4954 // to avoid excessive error messages. 4955 Imm = parseGPRIdxMacro(); 4956 4957 } else { 4958 if (getParser().parseAbsoluteExpression(Imm)) 4959 return MatchOperand_NoMatch; 4960 if (Imm < 0 || !isUInt<4>(Imm)) { 4961 Error(S, "invalid immediate: only 4-bit values are legal"); 4962 } 4963 } 4964 4965 Operands.push_back( 4966 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 4967 return MatchOperand_Success; 4968 } 4969 4970 bool AMDGPUOperand::isGPRIdxMode() const { 4971 return isImmTy(ImmTyGprIdxMode); 4972 } 4973 4974 //===----------------------------------------------------------------------===// 4975 // sopp branch targets 4976 //===----------------------------------------------------------------------===// 4977 4978 OperandMatchResultTy 4979 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4980 SMLoc S = Parser.getTok().getLoc(); 4981 4982 switch (getLexer().getKind()) { 4983 default: return MatchOperand_ParseFail; 4984 case AsmToken::Integer: { 4985 int64_t Imm; 4986 if (getParser().parseAbsoluteExpression(Imm)) 4987 return MatchOperand_ParseFail; 4988 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 4989 return MatchOperand_Success; 4990 } 4991 4992 case AsmToken::Identifier: 4993 Operands.push_back(AMDGPUOperand::CreateExpr(this, 4994 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 4995 Parser.getTok().getString()), getContext()), S)); 4996 Parser.Lex(); 4997 return MatchOperand_Success; 4998 } 4999 } 5000 5001 //===----------------------------------------------------------------------===// 5002 // mubuf 5003 //===----------------------------------------------------------------------===// 5004 5005 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5006 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5007 } 5008 5009 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5010 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5011 } 5012 5013 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5014 const OperandVector &Operands, 5015 bool IsAtomic, 5016 bool IsAtomicReturn, 5017 bool IsLds) { 5018 bool IsLdsOpcode = IsLds; 5019 bool HasLdsModifier = false; 5020 OptionalImmIndexMap OptionalIdx; 5021 assert(IsAtomicReturn ? IsAtomic : true); 5022 unsigned FirstOperandIdx = 1; 5023 5024 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5025 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5026 5027 // Add the register arguments 5028 if (Op.isReg()) { 5029 Op.addRegOperands(Inst, 1); 5030 // Insert a tied src for atomic return dst. 5031 // This cannot be postponed as subsequent calls to 5032 // addImmOperands rely on correct number of MC operands. 5033 if (IsAtomicReturn && i == FirstOperandIdx) 5034 Op.addRegOperands(Inst, 1); 5035 continue; 5036 } 5037 5038 // Handle the case where soffset is an immediate 5039 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5040 Op.addImmOperands(Inst, 1); 5041 continue; 5042 } 5043 5044 HasLdsModifier = Op.isLDS(); 5045 5046 // Handle tokens like 'offen' which are sometimes hard-coded into the 5047 // asm string. There are no MCInst operands for these. 5048 if (Op.isToken()) { 5049 continue; 5050 } 5051 assert(Op.isImm()); 5052 5053 // Handle optional arguments 5054 OptionalIdx[Op.getImmTy()] = i; 5055 } 5056 5057 // This is a workaround for an llvm quirk which may result in an 5058 // incorrect instruction selection. Lds and non-lds versions of 5059 // MUBUF instructions are identical except that lds versions 5060 // have mandatory 'lds' modifier. However this modifier follows 5061 // optional modifiers and llvm asm matcher regards this 'lds' 5062 // modifier as an optional one. As a result, an lds version 5063 // of opcode may be selected even if it has no 'lds' modifier. 5064 if (IsLdsOpcode && !HasLdsModifier) { 5065 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5066 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5067 Inst.setOpcode(NoLdsOpcode); 5068 IsLdsOpcode = false; 5069 } 5070 } 5071 5072 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5073 if (!IsAtomic) { // glc is hard-coded. 5074 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5075 } 5076 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5077 5078 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5079 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5080 } 5081 } 5082 5083 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5084 OptionalImmIndexMap OptionalIdx; 5085 5086 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5087 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5088 5089 // Add the register arguments 5090 if (Op.isReg()) { 5091 Op.addRegOperands(Inst, 1); 5092 continue; 5093 } 5094 5095 // Handle the case where soffset is an immediate 5096 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5097 Op.addImmOperands(Inst, 1); 5098 continue; 5099 } 5100 5101 // Handle tokens like 'offen' which are sometimes hard-coded into the 5102 // asm string. There are no MCInst operands for these. 5103 if (Op.isToken()) { 5104 continue; 5105 } 5106 assert(Op.isImm()); 5107 5108 // Handle optional arguments 5109 OptionalIdx[Op.getImmTy()] = i; 5110 } 5111 5112 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5113 AMDGPUOperand::ImmTyOffset); 5114 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5115 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5116 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5117 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5118 } 5119 5120 //===----------------------------------------------------------------------===// 5121 // mimg 5122 //===----------------------------------------------------------------------===// 5123 5124 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5125 bool IsAtomic) { 5126 unsigned I = 1; 5127 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5128 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5129 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5130 } 5131 5132 if (IsAtomic) { 5133 // Add src, same as dst 5134 assert(Desc.getNumDefs() == 1); 5135 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5136 } 5137 5138 OptionalImmIndexMap OptionalIdx; 5139 5140 for (unsigned E = Operands.size(); I != E; ++I) { 5141 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5142 5143 // Add the register arguments 5144 if (Op.isReg()) { 5145 Op.addRegOperands(Inst, 1); 5146 } else if (Op.isImmModifier()) { 5147 OptionalIdx[Op.getImmTy()] = I; 5148 } else { 5149 llvm_unreachable("unexpected operand type"); 5150 } 5151 } 5152 5153 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5154 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5155 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5156 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5158 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5159 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5160 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5161 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5162 } 5163 5164 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5165 cvtMIMG(Inst, Operands, true); 5166 } 5167 5168 //===----------------------------------------------------------------------===// 5169 // smrd 5170 //===----------------------------------------------------------------------===// 5171 5172 bool AMDGPUOperand::isSMRDOffset8() const { 5173 return isImm() && isUInt<8>(getImm()); 5174 } 5175 5176 bool AMDGPUOperand::isSMRDOffset20() const { 5177 return isImm() && isUInt<20>(getImm()); 5178 } 5179 5180 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5181 // 32-bit literals are only supported on CI and we only want to use them 5182 // when the offset is > 8-bits. 5183 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5184 } 5185 5186 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5187 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5188 } 5189 5190 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5191 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5192 } 5193 5194 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5195 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5196 } 5197 5198 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5199 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5200 } 5201 5202 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5203 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5204 } 5205 5206 //===----------------------------------------------------------------------===// 5207 // vop3 5208 //===----------------------------------------------------------------------===// 5209 5210 static bool ConvertOmodMul(int64_t &Mul) { 5211 if (Mul != 1 && Mul != 2 && Mul != 4) 5212 return false; 5213 5214 Mul >>= 1; 5215 return true; 5216 } 5217 5218 static bool ConvertOmodDiv(int64_t &Div) { 5219 if (Div == 1) { 5220 Div = 0; 5221 return true; 5222 } 5223 5224 if (Div == 2) { 5225 Div = 3; 5226 return true; 5227 } 5228 5229 return false; 5230 } 5231 5232 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5233 if (BoundCtrl == 0) { 5234 BoundCtrl = 1; 5235 return true; 5236 } 5237 5238 if (BoundCtrl == -1) { 5239 BoundCtrl = 0; 5240 return true; 5241 } 5242 5243 return false; 5244 } 5245 5246 // Note: the order in this table matches the order of operands in AsmString. 5247 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5248 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5249 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5250 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5251 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5252 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5253 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5254 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5255 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5256 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5257 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5258 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5259 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5260 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5261 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5262 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5263 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5264 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5265 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5266 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5267 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5268 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5269 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5270 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5271 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5272 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5273 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5274 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5275 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5276 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5277 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5278 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5279 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5280 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5281 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5282 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5283 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5284 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5285 }; 5286 5287 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5288 unsigned size = Operands.size(); 5289 assert(size > 0); 5290 5291 OperandMatchResultTy res = parseOptionalOpr(Operands); 5292 5293 // This is a hack to enable hardcoded mandatory operands which follow 5294 // optional operands. 5295 // 5296 // Current design assumes that all operands after the first optional operand 5297 // are also optional. However implementation of some instructions violates 5298 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5299 // 5300 // To alleviate this problem, we have to (implicitly) parse extra operands 5301 // to make sure autogenerated parser of custom operands never hit hardcoded 5302 // mandatory operands. 5303 5304 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5305 5306 // We have parsed the first optional operand. 5307 // Parse as many operands as necessary to skip all mandatory operands. 5308 5309 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5310 if (res != MatchOperand_Success || 5311 getLexer().is(AsmToken::EndOfStatement)) break; 5312 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5313 res = parseOptionalOpr(Operands); 5314 } 5315 } 5316 5317 return res; 5318 } 5319 5320 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5321 OperandMatchResultTy res; 5322 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5323 // try to parse any optional operand here 5324 if (Op.IsBit) { 5325 res = parseNamedBit(Op.Name, Operands, Op.Type); 5326 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5327 res = parseOModOperand(Operands); 5328 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5329 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5330 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5331 res = parseSDWASel(Operands, Op.Name, Op.Type); 5332 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5333 res = parseSDWADstUnused(Operands); 5334 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5335 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5336 Op.Type == AMDGPUOperand::ImmTyNegLo || 5337 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5338 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5339 Op.ConvertResult); 5340 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5341 res = parseDfmtNfmt(Operands); 5342 } else { 5343 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5344 } 5345 if (res != MatchOperand_NoMatch) { 5346 return res; 5347 } 5348 } 5349 return MatchOperand_NoMatch; 5350 } 5351 5352 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5353 StringRef Name = Parser.getTok().getString(); 5354 if (Name == "mul") { 5355 return parseIntWithPrefix("mul", Operands, 5356 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5357 } 5358 5359 if (Name == "div") { 5360 return parseIntWithPrefix("div", Operands, 5361 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5362 } 5363 5364 return MatchOperand_NoMatch; 5365 } 5366 5367 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5368 cvtVOP3P(Inst, Operands); 5369 5370 int Opc = Inst.getOpcode(); 5371 5372 int SrcNum; 5373 const int Ops[] = { AMDGPU::OpName::src0, 5374 AMDGPU::OpName::src1, 5375 AMDGPU::OpName::src2 }; 5376 for (SrcNum = 0; 5377 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5378 ++SrcNum); 5379 assert(SrcNum > 0); 5380 5381 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5382 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5383 5384 if ((OpSel & (1 << SrcNum)) != 0) { 5385 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5386 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5387 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5388 } 5389 } 5390 5391 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5392 // 1. This operand is input modifiers 5393 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5394 // 2. This is not last operand 5395 && Desc.NumOperands > (OpNum + 1) 5396 // 3. Next operand is register class 5397 && Desc.OpInfo[OpNum + 1].RegClass != -1 5398 // 4. Next register is not tied to any other operand 5399 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5400 } 5401 5402 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5403 { 5404 OptionalImmIndexMap OptionalIdx; 5405 unsigned Opc = Inst.getOpcode(); 5406 5407 unsigned I = 1; 5408 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5409 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5410 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5411 } 5412 5413 for (unsigned E = Operands.size(); I != E; ++I) { 5414 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5415 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5416 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5417 } else if (Op.isInterpSlot() || 5418 Op.isInterpAttr() || 5419 Op.isAttrChan()) { 5420 Inst.addOperand(MCOperand::createImm(Op.getImm())); 5421 } else if (Op.isImmModifier()) { 5422 OptionalIdx[Op.getImmTy()] = I; 5423 } else { 5424 llvm_unreachable("unhandled operand type"); 5425 } 5426 } 5427 5428 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5429 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5430 } 5431 5432 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5433 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5434 } 5435 5436 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5437 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5438 } 5439 } 5440 5441 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5442 OptionalImmIndexMap &OptionalIdx) { 5443 unsigned Opc = Inst.getOpcode(); 5444 5445 unsigned I = 1; 5446 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5447 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5448 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5449 } 5450 5451 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5452 // This instruction has src modifiers 5453 for (unsigned E = Operands.size(); I != E; ++I) { 5454 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5455 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5456 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5457 } else if (Op.isImmModifier()) { 5458 OptionalIdx[Op.getImmTy()] = I; 5459 } else if (Op.isRegOrImm()) { 5460 Op.addRegOrImmOperands(Inst, 1); 5461 } else { 5462 llvm_unreachable("unhandled operand type"); 5463 } 5464 } 5465 } else { 5466 // No src modifiers 5467 for (unsigned E = Operands.size(); I != E; ++I) { 5468 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5469 if (Op.isMod()) { 5470 OptionalIdx[Op.getImmTy()] = I; 5471 } else { 5472 Op.addRegOrImmOperands(Inst, 1); 5473 } 5474 } 5475 } 5476 5477 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5478 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5479 } 5480 5481 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5482 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5483 } 5484 5485 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5486 // it has src2 register operand that is tied to dst operand 5487 // we don't allow modifiers for this operand in assembler so src2_modifiers 5488 // should be 0. 5489 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5490 Opc == AMDGPU::V_MAC_F32_e64_vi || 5491 Opc == AMDGPU::V_MAC_F16_e64_vi || 5492 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5493 auto it = Inst.begin(); 5494 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5495 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5496 ++it; 5497 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5498 } 5499 } 5500 5501 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5502 OptionalImmIndexMap OptionalIdx; 5503 cvtVOP3(Inst, Operands, OptionalIdx); 5504 } 5505 5506 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5507 const OperandVector &Operands) { 5508 OptionalImmIndexMap OptIdx; 5509 const int Opc = Inst.getOpcode(); 5510 const MCInstrDesc &Desc = MII.get(Opc); 5511 5512 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5513 5514 cvtVOP3(Inst, Operands, OptIdx); 5515 5516 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5517 assert(!IsPacked); 5518 Inst.addOperand(Inst.getOperand(0)); 5519 } 5520 5521 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5522 // instruction, and then figure out where to actually put the modifiers 5523 5524 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5525 5526 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5527 if (OpSelHiIdx != -1) { 5528 int DefaultVal = IsPacked ? -1 : 0; 5529 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5530 DefaultVal); 5531 } 5532 5533 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5534 if (NegLoIdx != -1) { 5535 assert(IsPacked); 5536 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5537 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5538 } 5539 5540 const int Ops[] = { AMDGPU::OpName::src0, 5541 AMDGPU::OpName::src1, 5542 AMDGPU::OpName::src2 }; 5543 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5544 AMDGPU::OpName::src1_modifiers, 5545 AMDGPU::OpName::src2_modifiers }; 5546 5547 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5548 5549 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5550 unsigned OpSelHi = 0; 5551 unsigned NegLo = 0; 5552 unsigned NegHi = 0; 5553 5554 if (OpSelHiIdx != -1) { 5555 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5556 } 5557 5558 if (NegLoIdx != -1) { 5559 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5560 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5561 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5562 } 5563 5564 for (int J = 0; J < 3; ++J) { 5565 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5566 if (OpIdx == -1) 5567 break; 5568 5569 uint32_t ModVal = 0; 5570 5571 if ((OpSel & (1 << J)) != 0) 5572 ModVal |= SISrcMods::OP_SEL_0; 5573 5574 if ((OpSelHi & (1 << J)) != 0) 5575 ModVal |= SISrcMods::OP_SEL_1; 5576 5577 if ((NegLo & (1 << J)) != 0) 5578 ModVal |= SISrcMods::NEG; 5579 5580 if ((NegHi & (1 << J)) != 0) 5581 ModVal |= SISrcMods::NEG_HI; 5582 5583 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5584 5585 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5586 } 5587 } 5588 5589 //===----------------------------------------------------------------------===// 5590 // dpp 5591 //===----------------------------------------------------------------------===// 5592 5593 bool AMDGPUOperand::isDPPCtrl() const { 5594 using namespace AMDGPU::DPP; 5595 5596 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5597 if (result) { 5598 int64_t Imm = getImm(); 5599 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5600 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5601 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5602 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5603 (Imm == DppCtrl::WAVE_SHL1) || 5604 (Imm == DppCtrl::WAVE_ROL1) || 5605 (Imm == DppCtrl::WAVE_SHR1) || 5606 (Imm == DppCtrl::WAVE_ROR1) || 5607 (Imm == DppCtrl::ROW_MIRROR) || 5608 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5609 (Imm == DppCtrl::BCAST15) || 5610 (Imm == DppCtrl::BCAST31); 5611 } 5612 return false; 5613 } 5614 5615 bool AMDGPUOperand::isS16Imm() const { 5616 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5617 } 5618 5619 bool AMDGPUOperand::isU16Imm() const { 5620 return isImm() && isUInt<16>(getImm()); 5621 } 5622 5623 OperandMatchResultTy 5624 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5625 using namespace AMDGPU::DPP; 5626 5627 SMLoc S = Parser.getTok().getLoc(); 5628 StringRef Prefix; 5629 int64_t Int; 5630 5631 if (getLexer().getKind() == AsmToken::Identifier) { 5632 Prefix = Parser.getTok().getString(); 5633 } else { 5634 return MatchOperand_NoMatch; 5635 } 5636 5637 if (Prefix == "row_mirror") { 5638 Int = DppCtrl::ROW_MIRROR; 5639 Parser.Lex(); 5640 } else if (Prefix == "row_half_mirror") { 5641 Int = DppCtrl::ROW_HALF_MIRROR; 5642 Parser.Lex(); 5643 } else { 5644 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5645 if (Prefix != "quad_perm" 5646 && Prefix != "row_shl" 5647 && Prefix != "row_shr" 5648 && Prefix != "row_ror" 5649 && Prefix != "wave_shl" 5650 && Prefix != "wave_rol" 5651 && Prefix != "wave_shr" 5652 && Prefix != "wave_ror" 5653 && Prefix != "row_bcast") { 5654 return MatchOperand_NoMatch; 5655 } 5656 5657 Parser.Lex(); 5658 if (getLexer().isNot(AsmToken::Colon)) 5659 return MatchOperand_ParseFail; 5660 5661 if (Prefix == "quad_perm") { 5662 // quad_perm:[%d,%d,%d,%d] 5663 Parser.Lex(); 5664 if (getLexer().isNot(AsmToken::LBrac)) 5665 return MatchOperand_ParseFail; 5666 Parser.Lex(); 5667 5668 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5669 return MatchOperand_ParseFail; 5670 5671 for (int i = 0; i < 3; ++i) { 5672 if (getLexer().isNot(AsmToken::Comma)) 5673 return MatchOperand_ParseFail; 5674 Parser.Lex(); 5675 5676 int64_t Temp; 5677 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5678 return MatchOperand_ParseFail; 5679 const int shift = i*2 + 2; 5680 Int += (Temp << shift); 5681 } 5682 5683 if (getLexer().isNot(AsmToken::RBrac)) 5684 return MatchOperand_ParseFail; 5685 Parser.Lex(); 5686 } else { 5687 // sel:%d 5688 Parser.Lex(); 5689 if (getParser().parseAbsoluteExpression(Int)) 5690 return MatchOperand_ParseFail; 5691 5692 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5693 Int |= DppCtrl::ROW_SHL0; 5694 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5695 Int |= DppCtrl::ROW_SHR0; 5696 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5697 Int |= DppCtrl::ROW_ROR0; 5698 } else if (Prefix == "wave_shl" && 1 == Int) { 5699 Int = DppCtrl::WAVE_SHL1; 5700 } else if (Prefix == "wave_rol" && 1 == Int) { 5701 Int = DppCtrl::WAVE_ROL1; 5702 } else if (Prefix == "wave_shr" && 1 == Int) { 5703 Int = DppCtrl::WAVE_SHR1; 5704 } else if (Prefix == "wave_ror" && 1 == Int) { 5705 Int = DppCtrl::WAVE_ROR1; 5706 } else if (Prefix == "row_bcast") { 5707 if (Int == 15) { 5708 Int = DppCtrl::BCAST15; 5709 } else if (Int == 31) { 5710 Int = DppCtrl::BCAST31; 5711 } else { 5712 return MatchOperand_ParseFail; 5713 } 5714 } else { 5715 return MatchOperand_ParseFail; 5716 } 5717 } 5718 } 5719 5720 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5721 return MatchOperand_Success; 5722 } 5723 5724 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5725 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5726 } 5727 5728 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 5729 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 5730 } 5731 5732 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5733 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5734 } 5735 5736 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5737 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5738 } 5739 5740 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5741 OptionalImmIndexMap OptionalIdx; 5742 5743 unsigned I = 1; 5744 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5745 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5746 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5747 } 5748 5749 for (unsigned E = Operands.size(); I != E; ++I) { 5750 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5751 MCOI::TIED_TO); 5752 if (TiedTo != -1) { 5753 assert((unsigned)TiedTo < Inst.getNumOperands()); 5754 // handle tied old or src2 for MAC instructions 5755 Inst.addOperand(Inst.getOperand(TiedTo)); 5756 } 5757 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5758 // Add the register arguments 5759 if (Op.isReg() && Op.getReg() == AMDGPU::VCC) { 5760 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5761 // Skip it. 5762 continue; 5763 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5764 Op.addRegWithFPInputModsOperands(Inst, 2); 5765 } else if (Op.isDPPCtrl()) { 5766 Op.addImmOperands(Inst, 1); 5767 } else if (Op.isImm()) { 5768 // Handle optional arguments 5769 OptionalIdx[Op.getImmTy()] = I; 5770 } else { 5771 llvm_unreachable("Invalid operand type"); 5772 } 5773 } 5774 5775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5778 } 5779 5780 //===----------------------------------------------------------------------===// 5781 // sdwa 5782 //===----------------------------------------------------------------------===// 5783 5784 OperandMatchResultTy 5785 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5786 AMDGPUOperand::ImmTy Type) { 5787 using namespace llvm::AMDGPU::SDWA; 5788 5789 SMLoc S = Parser.getTok().getLoc(); 5790 StringRef Value; 5791 OperandMatchResultTy res; 5792 5793 res = parseStringWithPrefix(Prefix, Value); 5794 if (res != MatchOperand_Success) { 5795 return res; 5796 } 5797 5798 int64_t Int; 5799 Int = StringSwitch<int64_t>(Value) 5800 .Case("BYTE_0", SdwaSel::BYTE_0) 5801 .Case("BYTE_1", SdwaSel::BYTE_1) 5802 .Case("BYTE_2", SdwaSel::BYTE_2) 5803 .Case("BYTE_3", SdwaSel::BYTE_3) 5804 .Case("WORD_0", SdwaSel::WORD_0) 5805 .Case("WORD_1", SdwaSel::WORD_1) 5806 .Case("DWORD", SdwaSel::DWORD) 5807 .Default(0xffffffff); 5808 Parser.Lex(); // eat last token 5809 5810 if (Int == 0xffffffff) { 5811 return MatchOperand_ParseFail; 5812 } 5813 5814 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5815 return MatchOperand_Success; 5816 } 5817 5818 OperandMatchResultTy 5819 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5820 using namespace llvm::AMDGPU::SDWA; 5821 5822 SMLoc S = Parser.getTok().getLoc(); 5823 StringRef Value; 5824 OperandMatchResultTy res; 5825 5826 res = parseStringWithPrefix("dst_unused", Value); 5827 if (res != MatchOperand_Success) { 5828 return res; 5829 } 5830 5831 int64_t Int; 5832 Int = StringSwitch<int64_t>(Value) 5833 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5834 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5835 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5836 .Default(0xffffffff); 5837 Parser.Lex(); // eat last token 5838 5839 if (Int == 0xffffffff) { 5840 return MatchOperand_ParseFail; 5841 } 5842 5843 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5844 return MatchOperand_Success; 5845 } 5846 5847 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5848 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5849 } 5850 5851 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5852 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5853 } 5854 5855 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5856 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5857 } 5858 5859 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5860 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5861 } 5862 5863 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5864 uint64_t BasicInstType, bool skipVcc) { 5865 using namespace llvm::AMDGPU::SDWA; 5866 5867 OptionalImmIndexMap OptionalIdx; 5868 bool skippedVcc = false; 5869 5870 unsigned I = 1; 5871 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5872 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5873 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5874 } 5875 5876 for (unsigned E = Operands.size(); I != E; ++I) { 5877 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5878 if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) { 5879 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5880 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5881 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5882 // Skip VCC only if we didn't skip it on previous iteration. 5883 if (BasicInstType == SIInstrFlags::VOP2 && 5884 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5885 skippedVcc = true; 5886 continue; 5887 } else if (BasicInstType == SIInstrFlags::VOPC && 5888 Inst.getNumOperands() == 0) { 5889 skippedVcc = true; 5890 continue; 5891 } 5892 } 5893 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5894 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5895 } else if (Op.isImm()) { 5896 // Handle optional arguments 5897 OptionalIdx[Op.getImmTy()] = I; 5898 } else { 5899 llvm_unreachable("Invalid operand type"); 5900 } 5901 skippedVcc = false; 5902 } 5903 5904 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5905 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5906 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5907 switch (BasicInstType) { 5908 case SIInstrFlags::VOP1: 5909 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5910 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5911 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5912 } 5913 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5914 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5915 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5916 break; 5917 5918 case SIInstrFlags::VOP2: 5919 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5920 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5921 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5922 } 5923 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5924 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5926 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5927 break; 5928 5929 case SIInstrFlags::VOPC: 5930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5931 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5932 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5933 break; 5934 5935 default: 5936 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5937 } 5938 } 5939 5940 // special case v_mac_{f16, f32}: 5941 // it has src2 register operand that is tied to dst operand 5942 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5943 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5944 auto it = Inst.begin(); 5945 std::advance( 5946 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5947 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5948 } 5949 } 5950 5951 /// Force static initialization. 5952 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5953 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5954 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5955 } 5956 5957 #define GET_REGISTER_MATCHER 5958 #define GET_MATCHER_IMPLEMENTATION 5959 #define GET_MNEMONIC_SPELL_CHECKER 5960 #include "AMDGPUGenAsmMatcher.inc" 5961 5962 // This fuction should be defined after auto-generated include so that we have 5963 // MatchClassKind enum defined 5964 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5965 unsigned Kind) { 5966 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5967 // But MatchInstructionImpl() expects to meet token and fails to validate 5968 // operand. This method checks if we are given immediate operand but expect to 5969 // get corresponding token. 5970 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5971 switch (Kind) { 5972 case MCK_addr64: 5973 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5974 case MCK_gds: 5975 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5976 case MCK_lds: 5977 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5978 case MCK_glc: 5979 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5980 case MCK_idxen: 5981 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5982 case MCK_offen: 5983 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5984 case MCK_SSrcB32: 5985 // When operands have expression values, they will return true for isToken, 5986 // because it is not possible to distinguish between a token and an 5987 // expression at parse time. MatchInstructionImpl() will always try to 5988 // match an operand as a token, when isToken returns true, and when the 5989 // name of the expression is not a valid token, the match will fail, 5990 // so we need to handle it here. 5991 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 5992 case MCK_SSrcF32: 5993 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 5994 case MCK_SoppBrTarget: 5995 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 5996 case MCK_VReg32OrOff: 5997 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 5998 case MCK_InterpSlot: 5999 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6000 case MCK_Attr: 6001 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6002 case MCK_AttrChan: 6003 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6004 default: 6005 return Match_InvalidOperand; 6006 } 6007 } 6008 6009 //===----------------------------------------------------------------------===// 6010 // endpgm 6011 //===----------------------------------------------------------------------===// 6012 6013 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6014 SMLoc S = Parser.getTok().getLoc(); 6015 int64_t Imm = 0; 6016 6017 if (!parseExpr(Imm)) { 6018 // The operand is optional, if not present default to 0 6019 Imm = 0; 6020 } 6021 6022 if (!isUInt<16>(Imm)) { 6023 Error(S, "expected a 16-bit value"); 6024 return MatchOperand_ParseFail; 6025 } 6026 6027 Operands.push_back( 6028 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6029 return MatchOperand_Success; 6030 } 6031 6032 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6033