1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0u; 106 Operand |= Neg ? SISrcMods::NEG : 0u; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0u; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyGprIdxMode, 177 ImmTyEndpgm, 178 ImmTyHigh 179 }; 180 181 private: 182 struct TokOp { 183 const char *Data; 184 unsigned Length; 185 }; 186 187 struct ImmOp { 188 int64_t Val; 189 ImmTy Type; 190 bool IsFPImm; 191 Modifiers Mods; 192 }; 193 194 struct RegOp { 195 unsigned RegNo; 196 Modifiers Mods; 197 }; 198 199 union { 200 TokOp Tok; 201 ImmOp Imm; 202 RegOp Reg; 203 const MCExpr *Expr; 204 }; 205 206 public: 207 bool isToken() const override { 208 if (Kind == Token) 209 return true; 210 211 if (Kind != Expression || !Expr) 212 return false; 213 214 // When parsing operands, we can't always tell if something was meant to be 215 // a token, like 'gds', or an expression that references a global variable. 216 // In this case, we assume the string is an expression, and if we need to 217 // interpret is a token, then we treat the symbol name as the token. 218 return isa<MCSymbolRefExpr>(Expr); 219 } 220 221 bool isImm() const override { 222 return Kind == Immediate; 223 } 224 225 bool isInlinableImm(MVT type) const; 226 bool isLiteralImm(MVT type) const; 227 228 bool isRegKind() const { 229 return Kind == Register; 230 } 231 232 bool isReg() const override { 233 return isRegKind() && !hasModifiers(); 234 } 235 236 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 237 return isRegClass(RCID) || isInlinableImm(type); 238 } 239 240 bool isRegOrImmWithInt16InputMods() const { 241 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 242 } 243 244 bool isRegOrImmWithInt32InputMods() const { 245 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 246 } 247 248 bool isRegOrImmWithInt64InputMods() const { 249 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 250 } 251 252 bool isRegOrImmWithFP16InputMods() const { 253 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 254 } 255 256 bool isRegOrImmWithFP32InputMods() const { 257 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 258 } 259 260 bool isRegOrImmWithFP64InputMods() const { 261 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 262 } 263 264 bool isVReg() const { 265 return isRegClass(AMDGPU::VGPR_32RegClassID) || 266 isRegClass(AMDGPU::VReg_64RegClassID) || 267 isRegClass(AMDGPU::VReg_96RegClassID) || 268 isRegClass(AMDGPU::VReg_128RegClassID) || 269 isRegClass(AMDGPU::VReg_256RegClassID) || 270 isRegClass(AMDGPU::VReg_512RegClassID); 271 } 272 273 bool isVReg32() const { 274 return isRegClass(AMDGPU::VGPR_32RegClassID); 275 } 276 277 bool isVReg32OrOff() const { 278 return isOff() || isVReg32(); 279 } 280 281 bool isSDWAOperand(MVT type) const; 282 bool isSDWAFP16Operand() const; 283 bool isSDWAFP32Operand() const; 284 bool isSDWAInt16Operand() const; 285 bool isSDWAInt32Operand() const; 286 287 bool isImmTy(ImmTy ImmT) const { 288 return isImm() && Imm.Type == ImmT; 289 } 290 291 bool isImmModifier() const { 292 return isImm() && Imm.Type != ImmTyNone; 293 } 294 295 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 296 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 297 bool isDMask() const { return isImmTy(ImmTyDMask); } 298 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 299 bool isDA() const { return isImmTy(ImmTyDA); } 300 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 301 bool isLWE() const { return isImmTy(ImmTyLWE); } 302 bool isOff() const { return isImmTy(ImmTyOff); } 303 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 304 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 305 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 306 bool isOffen() const { return isImmTy(ImmTyOffen); } 307 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 308 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 309 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 310 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 311 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 312 313 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 314 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 315 bool isGDS() const { return isImmTy(ImmTyGDS); } 316 bool isLDS() const { return isImmTy(ImmTyLDS); } 317 bool isGLC() const { return isImmTy(ImmTyGLC); } 318 bool isSLC() const { return isImmTy(ImmTySLC); } 319 bool isTFE() const { return isImmTy(ImmTyTFE); } 320 bool isD16() const { return isImmTy(ImmTyD16); } 321 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 322 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 323 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 324 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 325 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 326 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 327 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 328 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 329 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 330 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 331 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 332 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 333 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 334 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 335 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 336 bool isHigh() const { return isImmTy(ImmTyHigh); } 337 338 bool isMod() const { 339 return isClampSI() || isOModSI(); 340 } 341 342 bool isRegOrImm() const { 343 return isReg() || isImm(); 344 } 345 346 bool isRegClass(unsigned RCID) const; 347 348 bool isInlineValue() const; 349 350 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 351 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 352 } 353 354 bool isSCSrcB16() const { 355 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 356 } 357 358 bool isSCSrcV2B16() const { 359 return isSCSrcB16(); 360 } 361 362 bool isSCSrcB32() const { 363 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 364 } 365 366 bool isSCSrcB64() const { 367 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 368 } 369 370 bool isSCSrcF16() const { 371 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 372 } 373 374 bool isSCSrcV2F16() const { 375 return isSCSrcF16(); 376 } 377 378 bool isSCSrcF32() const { 379 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 380 } 381 382 bool isSCSrcF64() const { 383 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 384 } 385 386 bool isSSrcB32() const { 387 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 388 } 389 390 bool isSSrcB16() const { 391 return isSCSrcB16() || isLiteralImm(MVT::i16); 392 } 393 394 bool isSSrcV2B16() const { 395 llvm_unreachable("cannot happen"); 396 return isSSrcB16(); 397 } 398 399 bool isSSrcB64() const { 400 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 401 // See isVSrc64(). 402 return isSCSrcB64() || isLiteralImm(MVT::i64); 403 } 404 405 bool isSSrcF32() const { 406 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 407 } 408 409 bool isSSrcF64() const { 410 return isSCSrcB64() || isLiteralImm(MVT::f64); 411 } 412 413 bool isSSrcF16() const { 414 return isSCSrcB16() || isLiteralImm(MVT::f16); 415 } 416 417 bool isSSrcV2F16() const { 418 llvm_unreachable("cannot happen"); 419 return isSSrcF16(); 420 } 421 422 bool isSSrcOrLdsB32() const { 423 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 424 isLiteralImm(MVT::i32) || isExpr(); 425 } 426 427 bool isVCSrcB32() const { 428 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 429 } 430 431 bool isVCSrcB64() const { 432 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 433 } 434 435 bool isVCSrcB16() const { 436 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 437 } 438 439 bool isVCSrcV2B16() const { 440 return isVCSrcB16(); 441 } 442 443 bool isVCSrcF32() const { 444 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 445 } 446 447 bool isVCSrcF64() const { 448 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 449 } 450 451 bool isVCSrcF16() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 453 } 454 455 bool isVCSrcV2F16() const { 456 return isVCSrcF16(); 457 } 458 459 bool isVSrcB32() const { 460 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 461 } 462 463 bool isVSrcB64() const { 464 return isVCSrcF64() || isLiteralImm(MVT::i64); 465 } 466 467 bool isVSrcB16() const { 468 return isVCSrcF16() || isLiteralImm(MVT::i16); 469 } 470 471 bool isVSrcV2B16() const { 472 llvm_unreachable("cannot happen"); 473 return isVSrcB16(); 474 } 475 476 bool isVSrcF32() const { 477 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 478 } 479 480 bool isVSrcF64() const { 481 return isVCSrcF64() || isLiteralImm(MVT::f64); 482 } 483 484 bool isVSrcF16() const { 485 return isVCSrcF16() || isLiteralImm(MVT::f16); 486 } 487 488 bool isVSrcV2F16() const { 489 llvm_unreachable("cannot happen"); 490 return isVSrcF16(); 491 } 492 493 bool isKImmFP32() const { 494 return isLiteralImm(MVT::f32); 495 } 496 497 bool isKImmFP16() const { 498 return isLiteralImm(MVT::f16); 499 } 500 501 bool isMem() const override { 502 return false; 503 } 504 505 bool isExpr() const { 506 return Kind == Expression; 507 } 508 509 bool isSoppBrTarget() const { 510 return isExpr() || isImm(); 511 } 512 513 bool isSWaitCnt() const; 514 bool isHwreg() const; 515 bool isSendMsg() const; 516 bool isSwizzle() const; 517 bool isSMRDOffset8() const; 518 bool isSMRDOffset20() const; 519 bool isSMRDLiteralOffset() const; 520 bool isDPPCtrl() const; 521 bool isGPRIdxMode() const; 522 bool isS16Imm() const; 523 bool isU16Imm() const; 524 bool isEndpgm() const; 525 526 StringRef getExpressionAsToken() const { 527 assert(isExpr()); 528 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 529 return S->getSymbol().getName(); 530 } 531 532 StringRef getToken() const { 533 assert(isToken()); 534 535 if (Kind == Expression) 536 return getExpressionAsToken(); 537 538 return StringRef(Tok.Data, Tok.Length); 539 } 540 541 int64_t getImm() const { 542 assert(isImm()); 543 return Imm.Val; 544 } 545 546 ImmTy getImmTy() const { 547 assert(isImm()); 548 return Imm.Type; 549 } 550 551 unsigned getReg() const override { 552 assert(isRegKind()); 553 return Reg.RegNo; 554 } 555 556 SMLoc getStartLoc() const override { 557 return StartLoc; 558 } 559 560 SMLoc getEndLoc() const override { 561 return EndLoc; 562 } 563 564 SMRange getLocRange() const { 565 return SMRange(StartLoc, EndLoc); 566 } 567 568 Modifiers getModifiers() const { 569 assert(isRegKind() || isImmTy(ImmTyNone)); 570 return isRegKind() ? Reg.Mods : Imm.Mods; 571 } 572 573 void setModifiers(Modifiers Mods) { 574 assert(isRegKind() || isImmTy(ImmTyNone)); 575 if (isRegKind()) 576 Reg.Mods = Mods; 577 else 578 Imm.Mods = Mods; 579 } 580 581 bool hasModifiers() const { 582 return getModifiers().hasModifiers(); 583 } 584 585 bool hasFPModifiers() const { 586 return getModifiers().hasFPModifiers(); 587 } 588 589 bool hasIntModifiers() const { 590 return getModifiers().hasIntModifiers(); 591 } 592 593 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 594 595 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 596 597 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 598 599 template <unsigned Bitwidth> 600 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 601 602 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 603 addKImmFPOperands<16>(Inst, N); 604 } 605 606 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 607 addKImmFPOperands<32>(Inst, N); 608 } 609 610 void addRegOperands(MCInst &Inst, unsigned N) const; 611 612 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 613 if (isRegKind()) 614 addRegOperands(Inst, N); 615 else if (isExpr()) 616 Inst.addOperand(MCOperand::createExpr(Expr)); 617 else 618 addImmOperands(Inst, N); 619 } 620 621 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 622 Modifiers Mods = getModifiers(); 623 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 624 if (isRegKind()) { 625 addRegOperands(Inst, N); 626 } else { 627 addImmOperands(Inst, N, false); 628 } 629 } 630 631 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 632 assert(!hasIntModifiers()); 633 addRegOrImmWithInputModsOperands(Inst, N); 634 } 635 636 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 637 assert(!hasFPModifiers()); 638 addRegOrImmWithInputModsOperands(Inst, N); 639 } 640 641 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 642 Modifiers Mods = getModifiers(); 643 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 644 assert(isRegKind()); 645 addRegOperands(Inst, N); 646 } 647 648 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 649 assert(!hasIntModifiers()); 650 addRegWithInputModsOperands(Inst, N); 651 } 652 653 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 654 assert(!hasFPModifiers()); 655 addRegWithInputModsOperands(Inst, N); 656 } 657 658 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 659 if (isImm()) 660 addImmOperands(Inst, N); 661 else { 662 assert(isExpr()); 663 Inst.addOperand(MCOperand::createExpr(Expr)); 664 } 665 } 666 667 static void printImmTy(raw_ostream& OS, ImmTy Type) { 668 switch (Type) { 669 case ImmTyNone: OS << "None"; break; 670 case ImmTyGDS: OS << "GDS"; break; 671 case ImmTyLDS: OS << "LDS"; break; 672 case ImmTyOffen: OS << "Offen"; break; 673 case ImmTyIdxen: OS << "Idxen"; break; 674 case ImmTyAddr64: OS << "Addr64"; break; 675 case ImmTyOffset: OS << "Offset"; break; 676 case ImmTyInstOffset: OS << "InstOffset"; break; 677 case ImmTyOffset0: OS << "Offset0"; break; 678 case ImmTyOffset1: OS << "Offset1"; break; 679 case ImmTyGLC: OS << "GLC"; break; 680 case ImmTySLC: OS << "SLC"; break; 681 case ImmTyTFE: OS << "TFE"; break; 682 case ImmTyD16: OS << "D16"; break; 683 case ImmTyFORMAT: OS << "FORMAT"; break; 684 case ImmTyClampSI: OS << "ClampSI"; break; 685 case ImmTyOModSI: OS << "OModSI"; break; 686 case ImmTyDppCtrl: OS << "DppCtrl"; break; 687 case ImmTyDppRowMask: OS << "DppRowMask"; break; 688 case ImmTyDppBankMask: OS << "DppBankMask"; break; 689 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 690 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 691 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 692 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 693 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 694 case ImmTyDMask: OS << "DMask"; break; 695 case ImmTyUNorm: OS << "UNorm"; break; 696 case ImmTyDA: OS << "DA"; break; 697 case ImmTyR128A16: OS << "R128A16"; break; 698 case ImmTyLWE: OS << "LWE"; break; 699 case ImmTyOff: OS << "Off"; break; 700 case ImmTyExpTgt: OS << "ExpTgt"; break; 701 case ImmTyExpCompr: OS << "ExpCompr"; break; 702 case ImmTyExpVM: OS << "ExpVM"; break; 703 case ImmTyHwreg: OS << "Hwreg"; break; 704 case ImmTySendMsg: OS << "SendMsg"; break; 705 case ImmTyInterpSlot: OS << "InterpSlot"; break; 706 case ImmTyInterpAttr: OS << "InterpAttr"; break; 707 case ImmTyAttrChan: OS << "AttrChan"; break; 708 case ImmTyOpSel: OS << "OpSel"; break; 709 case ImmTyOpSelHi: OS << "OpSelHi"; break; 710 case ImmTyNegLo: OS << "NegLo"; break; 711 case ImmTyNegHi: OS << "NegHi"; break; 712 case ImmTySwizzle: OS << "Swizzle"; break; 713 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 714 case ImmTyHigh: OS << "High"; break; 715 case ImmTyEndpgm: 716 OS << "Endpgm"; 717 break; 718 } 719 } 720 721 void print(raw_ostream &OS) const override { 722 switch (Kind) { 723 case Register: 724 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 725 break; 726 case Immediate: 727 OS << '<' << getImm(); 728 if (getImmTy() != ImmTyNone) { 729 OS << " type: "; printImmTy(OS, getImmTy()); 730 } 731 OS << " mods: " << Imm.Mods << '>'; 732 break; 733 case Token: 734 OS << '\'' << getToken() << '\''; 735 break; 736 case Expression: 737 OS << "<expr " << *Expr << '>'; 738 break; 739 } 740 } 741 742 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 743 int64_t Val, SMLoc Loc, 744 ImmTy Type = ImmTyNone, 745 bool IsFPImm = false) { 746 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 747 Op->Imm.Val = Val; 748 Op->Imm.IsFPImm = IsFPImm; 749 Op->Imm.Type = Type; 750 Op->Imm.Mods = Modifiers(); 751 Op->StartLoc = Loc; 752 Op->EndLoc = Loc; 753 return Op; 754 } 755 756 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 757 StringRef Str, SMLoc Loc, 758 bool HasExplicitEncodingSize = true) { 759 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 760 Res->Tok.Data = Str.data(); 761 Res->Tok.Length = Str.size(); 762 Res->StartLoc = Loc; 763 Res->EndLoc = Loc; 764 return Res; 765 } 766 767 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 768 unsigned RegNo, SMLoc S, 769 SMLoc E) { 770 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 771 Op->Reg.RegNo = RegNo; 772 Op->Reg.Mods = Modifiers(); 773 Op->StartLoc = S; 774 Op->EndLoc = E; 775 return Op; 776 } 777 778 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 779 const class MCExpr *Expr, SMLoc S) { 780 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 781 Op->Expr = Expr; 782 Op->StartLoc = S; 783 Op->EndLoc = S; 784 return Op; 785 } 786 }; 787 788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 789 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 790 return OS; 791 } 792 793 //===----------------------------------------------------------------------===// 794 // AsmParser 795 //===----------------------------------------------------------------------===// 796 797 // Holds info related to the current kernel, e.g. count of SGPRs used. 798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 799 // .amdgpu_hsa_kernel or at EOF. 800 class KernelScopeInfo { 801 int SgprIndexUnusedMin = -1; 802 int VgprIndexUnusedMin = -1; 803 MCContext *Ctx = nullptr; 804 805 void usesSgprAt(int i) { 806 if (i >= SgprIndexUnusedMin) { 807 SgprIndexUnusedMin = ++i; 808 if (Ctx) { 809 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 810 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 811 } 812 } 813 } 814 815 void usesVgprAt(int i) { 816 if (i >= VgprIndexUnusedMin) { 817 VgprIndexUnusedMin = ++i; 818 if (Ctx) { 819 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 820 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 821 } 822 } 823 } 824 825 public: 826 KernelScopeInfo() = default; 827 828 void initialize(MCContext &Context) { 829 Ctx = &Context; 830 usesSgprAt(SgprIndexUnusedMin = -1); 831 usesVgprAt(VgprIndexUnusedMin = -1); 832 } 833 834 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 835 switch (RegKind) { 836 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 837 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 838 default: break; 839 } 840 } 841 }; 842 843 class AMDGPUAsmParser : public MCTargetAsmParser { 844 MCAsmParser &Parser; 845 846 // Number of extra operands parsed after the first optional operand. 847 // This may be necessary to skip hardcoded mandatory operands. 848 static const unsigned MAX_OPR_LOOKAHEAD = 8; 849 850 unsigned ForcedEncodingSize = 0; 851 bool ForcedDPP = false; 852 bool ForcedSDWA = false; 853 KernelScopeInfo KernelScope; 854 855 /// @name Auto-generated Match Functions 856 /// { 857 858 #define GET_ASSEMBLER_HEADER 859 #include "AMDGPUGenAsmMatcher.inc" 860 861 /// } 862 863 private: 864 bool ParseAsAbsoluteExpression(uint32_t &Ret); 865 bool OutOfRangeError(SMRange Range); 866 /// Calculate VGPR/SGPR blocks required for given target, reserved 867 /// registers, and user-specified NextFreeXGPR values. 868 /// 869 /// \param Features [in] Target features, used for bug corrections. 870 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 871 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 872 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 873 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 874 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 875 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 876 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 877 /// \param VGPRBlocks [out] Result VGPR block count. 878 /// \param SGPRBlocks [out] Result SGPR block count. 879 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 880 bool FlatScrUsed, bool XNACKUsed, 881 unsigned NextFreeVGPR, SMRange VGPRRange, 882 unsigned NextFreeSGPR, SMRange SGPRRange, 883 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 884 bool ParseDirectiveAMDGCNTarget(); 885 bool ParseDirectiveAMDHSAKernel(); 886 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 887 bool ParseDirectiveHSACodeObjectVersion(); 888 bool ParseDirectiveHSACodeObjectISA(); 889 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 890 bool ParseDirectiveAMDKernelCodeT(); 891 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 892 bool ParseDirectiveAMDGPUHsaKernel(); 893 894 bool ParseDirectiveISAVersion(); 895 bool ParseDirectiveHSAMetadata(); 896 bool ParseDirectivePALMetadataBegin(); 897 bool ParseDirectivePALMetadata(); 898 899 /// Common code to parse out a block of text (typically YAML) between start and 900 /// end directives. 901 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 902 const char *AssemblerDirectiveEnd, 903 std::string &CollectString); 904 905 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 906 RegisterKind RegKind, unsigned Reg1, 907 unsigned RegNum); 908 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 909 unsigned& RegNum, unsigned& RegWidth, 910 unsigned *DwordRegIndex); 911 bool isRegister(); 912 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 913 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 914 void initializeGprCountSymbol(RegisterKind RegKind); 915 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 916 unsigned RegWidth); 917 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 918 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 919 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 920 bool IsGdsHardcoded); 921 922 public: 923 enum AMDGPUMatchResultTy { 924 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 925 }; 926 927 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 928 929 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 930 const MCInstrInfo &MII, 931 const MCTargetOptions &Options) 932 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 933 MCAsmParserExtension::Initialize(Parser); 934 935 if (getFeatureBits().none()) { 936 // Set default features. 937 copySTI().ToggleFeature("southern-islands"); 938 } 939 940 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 941 942 { 943 // TODO: make those pre-defined variables read-only. 944 // Currently there is none suitable machinery in the core llvm-mc for this. 945 // MCSymbol::isRedefinable is intended for another purpose, and 946 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 947 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 948 MCContext &Ctx = getContext(); 949 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 950 MCSymbol *Sym = 951 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 952 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 953 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 954 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 955 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 956 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 957 } else { 958 MCSymbol *Sym = 959 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 960 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 961 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 962 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 963 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 964 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 965 } 966 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 967 initializeGprCountSymbol(IS_VGPR); 968 initializeGprCountSymbol(IS_SGPR); 969 } else 970 KernelScope.initialize(getContext()); 971 } 972 } 973 974 bool hasXNACK() const { 975 return AMDGPU::hasXNACK(getSTI()); 976 } 977 978 bool hasMIMG_R128() const { 979 return AMDGPU::hasMIMG_R128(getSTI()); 980 } 981 982 bool hasPackedD16() const { 983 return AMDGPU::hasPackedD16(getSTI()); 984 } 985 986 bool isSI() const { 987 return AMDGPU::isSI(getSTI()); 988 } 989 990 bool isCI() const { 991 return AMDGPU::isCI(getSTI()); 992 } 993 994 bool isVI() const { 995 return AMDGPU::isVI(getSTI()); 996 } 997 998 bool isGFX9() const { 999 return AMDGPU::isGFX9(getSTI()); 1000 } 1001 1002 bool isGFX10() const { 1003 return AMDGPU::isGFX10(getSTI()); 1004 } 1005 1006 bool hasInv2PiInlineImm() const { 1007 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1008 } 1009 1010 bool hasFlatOffsets() const { 1011 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1012 } 1013 1014 bool hasSGPR102_SGPR103() const { 1015 return !isVI() && !isGFX9(); 1016 } 1017 1018 bool hasSGPR104_SGPR105() const { 1019 return isGFX10(); 1020 } 1021 1022 bool hasIntClamp() const { 1023 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1024 } 1025 1026 AMDGPUTargetStreamer &getTargetStreamer() { 1027 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1028 return static_cast<AMDGPUTargetStreamer &>(TS); 1029 } 1030 1031 const MCRegisterInfo *getMRI() const { 1032 // We need this const_cast because for some reason getContext() is not const 1033 // in MCAsmParser. 1034 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1035 } 1036 1037 const MCInstrInfo *getMII() const { 1038 return &MII; 1039 } 1040 1041 const FeatureBitset &getFeatureBits() const { 1042 return getSTI().getFeatureBits(); 1043 } 1044 1045 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1046 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1047 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1048 1049 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1050 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1051 bool isForcedDPP() const { return ForcedDPP; } 1052 bool isForcedSDWA() const { return ForcedSDWA; } 1053 ArrayRef<unsigned> getMatchedVariants() const; 1054 1055 std::unique_ptr<AMDGPUOperand> parseRegister(); 1056 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1057 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1058 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1059 unsigned Kind) override; 1060 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1061 OperandVector &Operands, MCStreamer &Out, 1062 uint64_t &ErrorInfo, 1063 bool MatchingInlineAsm) override; 1064 bool ParseDirective(AsmToken DirectiveID) override; 1065 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1066 StringRef parseMnemonicSuffix(StringRef Name); 1067 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1068 SMLoc NameLoc, OperandVector &Operands) override; 1069 //bool ProcessInstruction(MCInst &Inst); 1070 1071 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1072 1073 OperandMatchResultTy 1074 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1075 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1076 bool (*ConvertResult)(int64_t &) = nullptr); 1077 1078 OperandMatchResultTy parseOperandArrayWithPrefix( 1079 const char *Prefix, 1080 OperandVector &Operands, 1081 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1082 bool (*ConvertResult)(int64_t&) = nullptr); 1083 1084 OperandMatchResultTy 1085 parseNamedBit(const char *Name, OperandVector &Operands, 1086 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1087 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1088 StringRef &Value); 1089 1090 bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false); 1091 bool parseSP3NegModifier(); 1092 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1093 OperandMatchResultTy parseReg(OperandVector &Operands); 1094 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1095 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1096 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1097 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1098 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1099 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1100 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1101 1102 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1103 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1104 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1105 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1106 1107 bool parseCnt(int64_t &IntVal); 1108 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1109 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1110 1111 private: 1112 struct OperandInfoTy { 1113 int64_t Id; 1114 bool IsSymbolic = false; 1115 1116 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1117 }; 1118 1119 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1120 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1121 1122 void errorExpTgt(); 1123 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1124 1125 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1126 bool validateSOPLiteral(const MCInst &Inst) const; 1127 bool validateConstantBusLimitations(const MCInst &Inst); 1128 bool validateEarlyClobberLimitations(const MCInst &Inst); 1129 bool validateIntClampSupported(const MCInst &Inst); 1130 bool validateMIMGAtomicDMask(const MCInst &Inst); 1131 bool validateMIMGGatherDMask(const MCInst &Inst); 1132 bool validateMIMGDataSize(const MCInst &Inst); 1133 bool validateMIMGD16(const MCInst &Inst); 1134 bool validateLdsDirect(const MCInst &Inst); 1135 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1136 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1137 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1138 1139 bool isId(const StringRef Id) const; 1140 bool isId(const AsmToken &Token, const StringRef Id) const; 1141 bool isToken(const AsmToken::TokenKind Kind) const; 1142 bool trySkipId(const StringRef Id); 1143 bool trySkipToken(const AsmToken::TokenKind Kind); 1144 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1145 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1146 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1147 AsmToken::TokenKind getTokenKind() const; 1148 bool parseExpr(int64_t &Imm); 1149 StringRef getTokenStr() const; 1150 AsmToken peekToken(); 1151 AsmToken getToken() const; 1152 SMLoc getLoc() const; 1153 void lex(); 1154 1155 public: 1156 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1157 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1158 1159 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1160 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1161 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1162 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1163 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1164 1165 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1166 const unsigned MinVal, 1167 const unsigned MaxVal, 1168 const StringRef ErrMsg); 1169 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1170 bool parseSwizzleOffset(int64_t &Imm); 1171 bool parseSwizzleMacro(int64_t &Imm); 1172 bool parseSwizzleQuadPerm(int64_t &Imm); 1173 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1174 bool parseSwizzleBroadcast(int64_t &Imm); 1175 bool parseSwizzleSwap(int64_t &Imm); 1176 bool parseSwizzleReverse(int64_t &Imm); 1177 1178 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1179 int64_t parseGPRIdxMacro(); 1180 1181 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1182 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1183 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1184 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1185 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1186 1187 AMDGPUOperand::Ptr defaultGLC() const; 1188 AMDGPUOperand::Ptr defaultSLC() const; 1189 1190 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1191 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1192 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1193 AMDGPUOperand::Ptr defaultOffsetU12() const; 1194 AMDGPUOperand::Ptr defaultOffsetS13() const; 1195 1196 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1197 1198 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1199 OptionalImmIndexMap &OptionalIdx); 1200 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1201 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1202 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1203 1204 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1205 1206 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1207 bool IsAtomic = false); 1208 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1209 1210 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1211 AMDGPUOperand::Ptr defaultRowMask() const; 1212 AMDGPUOperand::Ptr defaultBankMask() const; 1213 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1214 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1215 1216 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1217 AMDGPUOperand::ImmTy Type); 1218 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1219 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1220 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1221 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1222 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1223 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1224 uint64_t BasicInstType, bool skipVcc = false); 1225 1226 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1227 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1228 }; 1229 1230 struct OptionalOperand { 1231 const char *Name; 1232 AMDGPUOperand::ImmTy Type; 1233 bool IsBit; 1234 bool (*ConvertResult)(int64_t&); 1235 }; 1236 1237 } // end anonymous namespace 1238 1239 // May be called with integer type with equivalent bitwidth. 1240 static const fltSemantics *getFltSemantics(unsigned Size) { 1241 switch (Size) { 1242 case 4: 1243 return &APFloat::IEEEsingle(); 1244 case 8: 1245 return &APFloat::IEEEdouble(); 1246 case 2: 1247 return &APFloat::IEEEhalf(); 1248 default: 1249 llvm_unreachable("unsupported fp type"); 1250 } 1251 } 1252 1253 static const fltSemantics *getFltSemantics(MVT VT) { 1254 return getFltSemantics(VT.getSizeInBits() / 8); 1255 } 1256 1257 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1258 switch (OperandType) { 1259 case AMDGPU::OPERAND_REG_IMM_INT32: 1260 case AMDGPU::OPERAND_REG_IMM_FP32: 1261 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1262 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1263 return &APFloat::IEEEsingle(); 1264 case AMDGPU::OPERAND_REG_IMM_INT64: 1265 case AMDGPU::OPERAND_REG_IMM_FP64: 1266 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1267 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1268 return &APFloat::IEEEdouble(); 1269 case AMDGPU::OPERAND_REG_IMM_INT16: 1270 case AMDGPU::OPERAND_REG_IMM_FP16: 1271 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1272 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1273 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1274 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1275 return &APFloat::IEEEhalf(); 1276 default: 1277 llvm_unreachable("unsupported fp type"); 1278 } 1279 } 1280 1281 //===----------------------------------------------------------------------===// 1282 // Operand 1283 //===----------------------------------------------------------------------===// 1284 1285 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1286 bool Lost; 1287 1288 // Convert literal to single precision 1289 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1290 APFloat::rmNearestTiesToEven, 1291 &Lost); 1292 // We allow precision lost but not overflow or underflow 1293 if (Status != APFloat::opOK && 1294 Lost && 1295 ((Status & APFloat::opOverflow) != 0 || 1296 (Status & APFloat::opUnderflow) != 0)) { 1297 return false; 1298 } 1299 1300 return true; 1301 } 1302 1303 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1304 return isUIntN(Size, Val) || isIntN(Size, Val); 1305 } 1306 1307 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1308 1309 // This is a hack to enable named inline values like 1310 // shared_base with both 32-bit and 64-bit operands. 1311 // Note that these values are defined as 1312 // 32-bit operands only. 1313 if (isInlineValue()) { 1314 return true; 1315 } 1316 1317 if (!isImmTy(ImmTyNone)) { 1318 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1319 return false; 1320 } 1321 // TODO: We should avoid using host float here. It would be better to 1322 // check the float bit values which is what a few other places do. 1323 // We've had bot failures before due to weird NaN support on mips hosts. 1324 1325 APInt Literal(64, Imm.Val); 1326 1327 if (Imm.IsFPImm) { // We got fp literal token 1328 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1329 return AMDGPU::isInlinableLiteral64(Imm.Val, 1330 AsmParser->hasInv2PiInlineImm()); 1331 } 1332 1333 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1334 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1335 return false; 1336 1337 if (type.getScalarSizeInBits() == 16) { 1338 return AMDGPU::isInlinableLiteral16( 1339 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1340 AsmParser->hasInv2PiInlineImm()); 1341 } 1342 1343 // Check if single precision literal is inlinable 1344 return AMDGPU::isInlinableLiteral32( 1345 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1346 AsmParser->hasInv2PiInlineImm()); 1347 } 1348 1349 // We got int literal token. 1350 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1351 return AMDGPU::isInlinableLiteral64(Imm.Val, 1352 AsmParser->hasInv2PiInlineImm()); 1353 } 1354 1355 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1356 return false; 1357 } 1358 1359 if (type.getScalarSizeInBits() == 16) { 1360 return AMDGPU::isInlinableLiteral16( 1361 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1362 AsmParser->hasInv2PiInlineImm()); 1363 } 1364 1365 return AMDGPU::isInlinableLiteral32( 1366 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1367 AsmParser->hasInv2PiInlineImm()); 1368 } 1369 1370 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1371 // Check that this immediate can be added as literal 1372 if (!isImmTy(ImmTyNone)) { 1373 return false; 1374 } 1375 1376 if (!Imm.IsFPImm) { 1377 // We got int literal token. 1378 1379 if (type == MVT::f64 && hasFPModifiers()) { 1380 // Cannot apply fp modifiers to int literals preserving the same semantics 1381 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1382 // disable these cases. 1383 return false; 1384 } 1385 1386 unsigned Size = type.getSizeInBits(); 1387 if (Size == 64) 1388 Size = 32; 1389 1390 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1391 // types. 1392 return isSafeTruncation(Imm.Val, Size); 1393 } 1394 1395 // We got fp literal token 1396 if (type == MVT::f64) { // Expected 64-bit fp operand 1397 // We would set low 64-bits of literal to zeroes but we accept this literals 1398 return true; 1399 } 1400 1401 if (type == MVT::i64) { // Expected 64-bit int operand 1402 // We don't allow fp literals in 64-bit integer instructions. It is 1403 // unclear how we should encode them. 1404 return false; 1405 } 1406 1407 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1408 return canLosslesslyConvertToFPType(FPLiteral, type); 1409 } 1410 1411 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1412 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1413 } 1414 1415 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1416 if (AsmParser->isVI()) 1417 return isVReg32(); 1418 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1419 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1420 else 1421 return false; 1422 } 1423 1424 bool AMDGPUOperand::isSDWAFP16Operand() const { 1425 return isSDWAOperand(MVT::f16); 1426 } 1427 1428 bool AMDGPUOperand::isSDWAFP32Operand() const { 1429 return isSDWAOperand(MVT::f32); 1430 } 1431 1432 bool AMDGPUOperand::isSDWAInt16Operand() const { 1433 return isSDWAOperand(MVT::i16); 1434 } 1435 1436 bool AMDGPUOperand::isSDWAInt32Operand() const { 1437 return isSDWAOperand(MVT::i32); 1438 } 1439 1440 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1441 { 1442 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1443 assert(Size == 2 || Size == 4 || Size == 8); 1444 1445 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1446 1447 if (Imm.Mods.Abs) { 1448 Val &= ~FpSignMask; 1449 } 1450 if (Imm.Mods.Neg) { 1451 Val ^= FpSignMask; 1452 } 1453 1454 return Val; 1455 } 1456 1457 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1458 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1459 Inst.getNumOperands())) { 1460 addLiteralImmOperand(Inst, Imm.Val, 1461 ApplyModifiers & 1462 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1463 } else { 1464 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1465 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1466 } 1467 } 1468 1469 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1470 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1471 auto OpNum = Inst.getNumOperands(); 1472 // Check that this operand accepts literals 1473 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1474 1475 if (ApplyModifiers) { 1476 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1477 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1478 Val = applyInputFPModifiers(Val, Size); 1479 } 1480 1481 APInt Literal(64, Val); 1482 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1483 1484 if (Imm.IsFPImm) { // We got fp literal token 1485 switch (OpTy) { 1486 case AMDGPU::OPERAND_REG_IMM_INT64: 1487 case AMDGPU::OPERAND_REG_IMM_FP64: 1488 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1489 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1490 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1491 AsmParser->hasInv2PiInlineImm())) { 1492 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1493 return; 1494 } 1495 1496 // Non-inlineable 1497 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1498 // For fp operands we check if low 32 bits are zeros 1499 if (Literal.getLoBits(32) != 0) { 1500 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1501 "Can't encode literal as exact 64-bit floating-point operand. " 1502 "Low 32-bits will be set to zero"); 1503 } 1504 1505 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1506 return; 1507 } 1508 1509 // We don't allow fp literals in 64-bit integer instructions. It is 1510 // unclear how we should encode them. This case should be checked earlier 1511 // in predicate methods (isLiteralImm()) 1512 llvm_unreachable("fp literal in 64-bit integer instruction."); 1513 1514 case AMDGPU::OPERAND_REG_IMM_INT32: 1515 case AMDGPU::OPERAND_REG_IMM_FP32: 1516 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1517 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1518 case AMDGPU::OPERAND_REG_IMM_INT16: 1519 case AMDGPU::OPERAND_REG_IMM_FP16: 1520 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1521 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1522 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1523 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1524 bool lost; 1525 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1526 // Convert literal to single precision 1527 FPLiteral.convert(*getOpFltSemantics(OpTy), 1528 APFloat::rmNearestTiesToEven, &lost); 1529 // We allow precision lost but not overflow or underflow. This should be 1530 // checked earlier in isLiteralImm() 1531 1532 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1533 Inst.addOperand(MCOperand::createImm(ImmVal)); 1534 return; 1535 } 1536 default: 1537 llvm_unreachable("invalid operand size"); 1538 } 1539 1540 return; 1541 } 1542 1543 // We got int literal token. 1544 // Only sign extend inline immediates. 1545 switch (OpTy) { 1546 case AMDGPU::OPERAND_REG_IMM_INT32: 1547 case AMDGPU::OPERAND_REG_IMM_FP32: 1548 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1549 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1550 if (isSafeTruncation(Val, 32) && 1551 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1552 AsmParser->hasInv2PiInlineImm())) { 1553 Inst.addOperand(MCOperand::createImm(Val)); 1554 return; 1555 } 1556 1557 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1558 return; 1559 1560 case AMDGPU::OPERAND_REG_IMM_INT64: 1561 case AMDGPU::OPERAND_REG_IMM_FP64: 1562 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1563 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1564 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1565 Inst.addOperand(MCOperand::createImm(Val)); 1566 return; 1567 } 1568 1569 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1570 return; 1571 1572 case AMDGPU::OPERAND_REG_IMM_INT16: 1573 case AMDGPU::OPERAND_REG_IMM_FP16: 1574 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1575 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1576 if (isSafeTruncation(Val, 16) && 1577 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1578 AsmParser->hasInv2PiInlineImm())) { 1579 Inst.addOperand(MCOperand::createImm(Val)); 1580 return; 1581 } 1582 1583 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1584 return; 1585 1586 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1587 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1588 assert(isSafeTruncation(Val, 16)); 1589 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1590 AsmParser->hasInv2PiInlineImm())); 1591 1592 Inst.addOperand(MCOperand::createImm(Val)); 1593 return; 1594 } 1595 default: 1596 llvm_unreachable("invalid operand size"); 1597 } 1598 } 1599 1600 template <unsigned Bitwidth> 1601 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1602 APInt Literal(64, Imm.Val); 1603 1604 if (!Imm.IsFPImm) { 1605 // We got int literal token. 1606 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1607 return; 1608 } 1609 1610 bool Lost; 1611 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1612 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1613 APFloat::rmNearestTiesToEven, &Lost); 1614 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1615 } 1616 1617 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1618 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1619 } 1620 1621 static bool isInlineValue(unsigned Reg) { 1622 switch (Reg) { 1623 case AMDGPU::SRC_SHARED_BASE: 1624 case AMDGPU::SRC_SHARED_LIMIT: 1625 case AMDGPU::SRC_PRIVATE_BASE: 1626 case AMDGPU::SRC_PRIVATE_LIMIT: 1627 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1628 return true; 1629 default: 1630 return false; 1631 } 1632 } 1633 1634 bool AMDGPUOperand::isInlineValue() const { 1635 return isRegKind() && ::isInlineValue(getReg()); 1636 } 1637 1638 //===----------------------------------------------------------------------===// 1639 // AsmParser 1640 //===----------------------------------------------------------------------===// 1641 1642 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1643 if (Is == IS_VGPR) { 1644 switch (RegWidth) { 1645 default: return -1; 1646 case 1: return AMDGPU::VGPR_32RegClassID; 1647 case 2: return AMDGPU::VReg_64RegClassID; 1648 case 3: return AMDGPU::VReg_96RegClassID; 1649 case 4: return AMDGPU::VReg_128RegClassID; 1650 case 8: return AMDGPU::VReg_256RegClassID; 1651 case 16: return AMDGPU::VReg_512RegClassID; 1652 } 1653 } else if (Is == IS_TTMP) { 1654 switch (RegWidth) { 1655 default: return -1; 1656 case 1: return AMDGPU::TTMP_32RegClassID; 1657 case 2: return AMDGPU::TTMP_64RegClassID; 1658 case 4: return AMDGPU::TTMP_128RegClassID; 1659 case 8: return AMDGPU::TTMP_256RegClassID; 1660 case 16: return AMDGPU::TTMP_512RegClassID; 1661 } 1662 } else if (Is == IS_SGPR) { 1663 switch (RegWidth) { 1664 default: return -1; 1665 case 1: return AMDGPU::SGPR_32RegClassID; 1666 case 2: return AMDGPU::SGPR_64RegClassID; 1667 case 4: return AMDGPU::SGPR_128RegClassID; 1668 case 8: return AMDGPU::SGPR_256RegClassID; 1669 case 16: return AMDGPU::SGPR_512RegClassID; 1670 } 1671 } 1672 return -1; 1673 } 1674 1675 static unsigned getSpecialRegForName(StringRef RegName) { 1676 return StringSwitch<unsigned>(RegName) 1677 .Case("exec", AMDGPU::EXEC) 1678 .Case("vcc", AMDGPU::VCC) 1679 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1680 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1681 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1682 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1683 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1684 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1685 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1686 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1687 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1688 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1689 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1690 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1691 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1692 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1693 .Case("m0", AMDGPU::M0) 1694 .Case("scc", AMDGPU::SCC) 1695 .Case("tba", AMDGPU::TBA) 1696 .Case("tma", AMDGPU::TMA) 1697 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1698 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1699 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1700 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1701 .Case("vcc_lo", AMDGPU::VCC_LO) 1702 .Case("vcc_hi", AMDGPU::VCC_HI) 1703 .Case("exec_lo", AMDGPU::EXEC_LO) 1704 .Case("exec_hi", AMDGPU::EXEC_HI) 1705 .Case("tma_lo", AMDGPU::TMA_LO) 1706 .Case("tma_hi", AMDGPU::TMA_HI) 1707 .Case("tba_lo", AMDGPU::TBA_LO) 1708 .Case("tba_hi", AMDGPU::TBA_HI) 1709 .Case("null", AMDGPU::SGPR_NULL) 1710 .Default(0); 1711 } 1712 1713 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1714 SMLoc &EndLoc) { 1715 auto R = parseRegister(); 1716 if (!R) return true; 1717 assert(R->isReg()); 1718 RegNo = R->getReg(); 1719 StartLoc = R->getStartLoc(); 1720 EndLoc = R->getEndLoc(); 1721 return false; 1722 } 1723 1724 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1725 RegisterKind RegKind, unsigned Reg1, 1726 unsigned RegNum) { 1727 switch (RegKind) { 1728 case IS_SPECIAL: 1729 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1730 Reg = AMDGPU::EXEC; 1731 RegWidth = 2; 1732 return true; 1733 } 1734 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1735 Reg = AMDGPU::FLAT_SCR; 1736 RegWidth = 2; 1737 return true; 1738 } 1739 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1740 Reg = AMDGPU::XNACK_MASK; 1741 RegWidth = 2; 1742 return true; 1743 } 1744 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1745 Reg = AMDGPU::VCC; 1746 RegWidth = 2; 1747 return true; 1748 } 1749 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1750 Reg = AMDGPU::TBA; 1751 RegWidth = 2; 1752 return true; 1753 } 1754 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1755 Reg = AMDGPU::TMA; 1756 RegWidth = 2; 1757 return true; 1758 } 1759 return false; 1760 case IS_VGPR: 1761 case IS_SGPR: 1762 case IS_TTMP: 1763 if (Reg1 != Reg + RegWidth) { 1764 return false; 1765 } 1766 RegWidth++; 1767 return true; 1768 default: 1769 llvm_unreachable("unexpected register kind"); 1770 } 1771 } 1772 1773 static const StringRef Registers[] = { 1774 { "v" }, 1775 { "s" }, 1776 { "ttmp" }, 1777 }; 1778 1779 bool 1780 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1781 const AsmToken &NextToken) const { 1782 1783 // A list of consecutive registers: [s0,s1,s2,s3] 1784 if (Token.is(AsmToken::LBrac)) 1785 return true; 1786 1787 if (!Token.is(AsmToken::Identifier)) 1788 return false; 1789 1790 // A single register like s0 or a range of registers like s[0:1] 1791 1792 StringRef RegName = Token.getString(); 1793 1794 for (StringRef Reg : Registers) { 1795 if (RegName.startswith(Reg)) { 1796 if (Reg.size() < RegName.size()) { 1797 unsigned RegNum; 1798 // A single register with an index: rXX 1799 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1800 return true; 1801 } else { 1802 // A range of registers: r[XX:YY]. 1803 if (NextToken.is(AsmToken::LBrac)) 1804 return true; 1805 } 1806 } 1807 } 1808 1809 return getSpecialRegForName(RegName); 1810 } 1811 1812 bool 1813 AMDGPUAsmParser::isRegister() 1814 { 1815 return isRegister(getToken(), peekToken()); 1816 } 1817 1818 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1819 unsigned &RegNum, unsigned &RegWidth, 1820 unsigned *DwordRegIndex) { 1821 if (DwordRegIndex) { *DwordRegIndex = 0; } 1822 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1823 if (getLexer().is(AsmToken::Identifier)) { 1824 StringRef RegName = Parser.getTok().getString(); 1825 if ((Reg = getSpecialRegForName(RegName))) { 1826 Parser.Lex(); 1827 RegKind = IS_SPECIAL; 1828 } else { 1829 unsigned RegNumIndex = 0; 1830 if (RegName[0] == 'v') { 1831 RegNumIndex = 1; 1832 RegKind = IS_VGPR; 1833 } else if (RegName[0] == 's') { 1834 RegNumIndex = 1; 1835 RegKind = IS_SGPR; 1836 } else if (RegName.startswith("ttmp")) { 1837 RegNumIndex = strlen("ttmp"); 1838 RegKind = IS_TTMP; 1839 } else { 1840 return false; 1841 } 1842 if (RegName.size() > RegNumIndex) { 1843 // Single 32-bit register: vXX. 1844 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1845 return false; 1846 Parser.Lex(); 1847 RegWidth = 1; 1848 } else { 1849 // Range of registers: v[XX:YY]. ":YY" is optional. 1850 Parser.Lex(); 1851 int64_t RegLo, RegHi; 1852 if (getLexer().isNot(AsmToken::LBrac)) 1853 return false; 1854 Parser.Lex(); 1855 1856 if (getParser().parseAbsoluteExpression(RegLo)) 1857 return false; 1858 1859 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1860 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1861 return false; 1862 Parser.Lex(); 1863 1864 if (isRBrace) { 1865 RegHi = RegLo; 1866 } else { 1867 if (getParser().parseAbsoluteExpression(RegHi)) 1868 return false; 1869 1870 if (getLexer().isNot(AsmToken::RBrac)) 1871 return false; 1872 Parser.Lex(); 1873 } 1874 RegNum = (unsigned) RegLo; 1875 RegWidth = (RegHi - RegLo) + 1; 1876 } 1877 } 1878 } else if (getLexer().is(AsmToken::LBrac)) { 1879 // List of consecutive registers: [s0,s1,s2,s3] 1880 Parser.Lex(); 1881 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1882 return false; 1883 if (RegWidth != 1) 1884 return false; 1885 RegisterKind RegKind1; 1886 unsigned Reg1, RegNum1, RegWidth1; 1887 do { 1888 if (getLexer().is(AsmToken::Comma)) { 1889 Parser.Lex(); 1890 } else if (getLexer().is(AsmToken::RBrac)) { 1891 Parser.Lex(); 1892 break; 1893 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1894 if (RegWidth1 != 1) { 1895 return false; 1896 } 1897 if (RegKind1 != RegKind) { 1898 return false; 1899 } 1900 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1901 return false; 1902 } 1903 } else { 1904 return false; 1905 } 1906 } while (true); 1907 } else { 1908 return false; 1909 } 1910 switch (RegKind) { 1911 case IS_SPECIAL: 1912 RegNum = 0; 1913 RegWidth = 1; 1914 break; 1915 case IS_VGPR: 1916 case IS_SGPR: 1917 case IS_TTMP: 1918 { 1919 unsigned Size = 1; 1920 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1921 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1922 Size = std::min(RegWidth, 4u); 1923 } 1924 if (RegNum % Size != 0) 1925 return false; 1926 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1927 RegNum = RegNum / Size; 1928 int RCID = getRegClass(RegKind, RegWidth); 1929 if (RCID == -1) 1930 return false; 1931 const MCRegisterClass RC = TRI->getRegClass(RCID); 1932 if (RegNum >= RC.getNumRegs()) 1933 return false; 1934 Reg = RC.getRegister(RegNum); 1935 break; 1936 } 1937 1938 default: 1939 llvm_unreachable("unexpected register kind"); 1940 } 1941 1942 if (!subtargetHasRegister(*TRI, Reg)) 1943 return false; 1944 return true; 1945 } 1946 1947 Optional<StringRef> 1948 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1949 switch (RegKind) { 1950 case IS_VGPR: 1951 return StringRef(".amdgcn.next_free_vgpr"); 1952 case IS_SGPR: 1953 return StringRef(".amdgcn.next_free_sgpr"); 1954 default: 1955 return None; 1956 } 1957 } 1958 1959 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1960 auto SymbolName = getGprCountSymbolName(RegKind); 1961 assert(SymbolName && "initializing invalid register kind"); 1962 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1963 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1964 } 1965 1966 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1967 unsigned DwordRegIndex, 1968 unsigned RegWidth) { 1969 // Symbols are only defined for GCN targets 1970 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1971 return true; 1972 1973 auto SymbolName = getGprCountSymbolName(RegKind); 1974 if (!SymbolName) 1975 return true; 1976 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1977 1978 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1979 int64_t OldCount; 1980 1981 if (!Sym->isVariable()) 1982 return !Error(getParser().getTok().getLoc(), 1983 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1984 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1985 return !Error( 1986 getParser().getTok().getLoc(), 1987 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1988 1989 if (OldCount <= NewMax) 1990 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1991 1992 return true; 1993 } 1994 1995 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1996 const auto &Tok = Parser.getTok(); 1997 SMLoc StartLoc = Tok.getLoc(); 1998 SMLoc EndLoc = Tok.getEndLoc(); 1999 RegisterKind RegKind; 2000 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2001 2002 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2003 //FIXME: improve error messages (bug 41303). 2004 Error(StartLoc, "not a valid operand."); 2005 return nullptr; 2006 } 2007 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2008 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2009 return nullptr; 2010 } else 2011 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2012 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2013 } 2014 2015 bool 2016 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) { 2017 if (HasSP3AbsModifier) { 2018 // This is a workaround for handling expressions 2019 // as arguments of SP3 'abs' modifier, for example: 2020 // |1.0| 2021 // |-1| 2022 // |1+x| 2023 // This syntax is not compatible with syntax of standard 2024 // MC expressions (due to the trailing '|'). 2025 2026 SMLoc EndLoc; 2027 const MCExpr *Expr; 2028 SMLoc StartLoc = getLoc(); 2029 2030 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 2031 return true; 2032 } 2033 2034 if (!Expr->evaluateAsAbsolute(Val)) 2035 return Error(StartLoc, "expected absolute expression"); 2036 2037 return false; 2038 } 2039 2040 return getParser().parseAbsoluteExpression(Val); 2041 } 2042 2043 OperandMatchResultTy 2044 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2045 // TODO: add syntactic sugar for 1/(2*PI) 2046 2047 const auto& Tok = getToken(); 2048 const auto& NextTok = peekToken(); 2049 bool IsReal = Tok.is(AsmToken::Real); 2050 SMLoc S = Tok.getLoc(); 2051 bool Negate = false; 2052 2053 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2054 lex(); 2055 IsReal = true; 2056 Negate = true; 2057 } 2058 2059 if (IsReal) { 2060 // Floating-point expressions are not supported. 2061 // Can only allow floating-point literals with an 2062 // optional sign. 2063 2064 StringRef Num = getTokenStr(); 2065 lex(); 2066 2067 APFloat RealVal(APFloat::IEEEdouble()); 2068 auto roundMode = APFloat::rmNearestTiesToEven; 2069 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2070 return MatchOperand_ParseFail; 2071 } 2072 if (Negate) 2073 RealVal.changeSign(); 2074 2075 Operands.push_back( 2076 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2077 AMDGPUOperand::ImmTyNone, true)); 2078 2079 return MatchOperand_Success; 2080 2081 // FIXME: Should enable arbitrary expressions here 2082 } else if (Tok.is(AsmToken::Integer) || 2083 (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){ 2084 2085 int64_t IntVal; 2086 if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier)) 2087 return MatchOperand_ParseFail; 2088 2089 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2090 return MatchOperand_Success; 2091 } 2092 2093 return MatchOperand_NoMatch; 2094 } 2095 2096 OperandMatchResultTy 2097 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2098 if (!isRegister()) 2099 return MatchOperand_NoMatch; 2100 2101 if (auto R = parseRegister()) { 2102 assert(R->isReg()); 2103 Operands.push_back(std::move(R)); 2104 return MatchOperand_Success; 2105 } 2106 return MatchOperand_ParseFail; 2107 } 2108 2109 OperandMatchResultTy 2110 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2111 auto res = parseReg(Operands); 2112 return (res == MatchOperand_NoMatch)? 2113 parseImm(Operands, HasSP3AbsMod) : 2114 res; 2115 } 2116 2117 // Check if the current token is an SP3 'neg' modifier. 2118 // Currently this modifier is allowed in the following context: 2119 // 2120 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2121 // 2. Before an 'abs' modifier: -abs(...) 2122 // 3. Before an SP3 'abs' modifier: -|...| 2123 // 2124 // In all other cases "-" is handled as a part 2125 // of an expression that follows the sign. 2126 // 2127 // Note: When "-" is followed by an integer literal, 2128 // this is interpreted as integer negation rather 2129 // than a floating-point NEG modifier applied to N. 2130 // Beside being contr-intuitive, such use of floating-point 2131 // NEG modifier would have resulted in different meaning 2132 // of integer literals used with VOP1/2/C and VOP3, 2133 // for example: 2134 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2135 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2136 // Negative fp literals with preceding "-" are 2137 // handled likewise for unifomtity 2138 // 2139 bool 2140 AMDGPUAsmParser::parseSP3NegModifier() { 2141 2142 AsmToken NextToken[2]; 2143 peekTokens(NextToken); 2144 2145 if (isToken(AsmToken::Minus) && 2146 (isRegister(NextToken[0], NextToken[1]) || 2147 NextToken[0].is(AsmToken::Pipe) || 2148 isId(NextToken[0], "abs"))) { 2149 lex(); 2150 return true; 2151 } 2152 2153 return false; 2154 } 2155 2156 OperandMatchResultTy 2157 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2158 bool AllowImm) { 2159 bool Neg, SP3Neg; 2160 bool Abs, SP3Abs; 2161 SMLoc Loc; 2162 2163 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2164 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2165 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2166 return MatchOperand_ParseFail; 2167 } 2168 2169 SP3Neg = parseSP3NegModifier(); 2170 2171 Loc = getLoc(); 2172 Neg = trySkipId("neg"); 2173 if (Neg && SP3Neg) { 2174 Error(Loc, "expected register or immediate"); 2175 return MatchOperand_ParseFail; 2176 } 2177 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2178 return MatchOperand_ParseFail; 2179 2180 Abs = trySkipId("abs"); 2181 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2182 return MatchOperand_ParseFail; 2183 2184 Loc = getLoc(); 2185 SP3Abs = trySkipToken(AsmToken::Pipe); 2186 if (Abs && SP3Abs) { 2187 Error(Loc, "expected register or immediate"); 2188 return MatchOperand_ParseFail; 2189 } 2190 2191 OperandMatchResultTy Res; 2192 if (AllowImm) { 2193 Res = parseRegOrImm(Operands, SP3Abs); 2194 } else { 2195 Res = parseReg(Operands); 2196 } 2197 if (Res != MatchOperand_Success) { 2198 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2199 } 2200 2201 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2202 return MatchOperand_ParseFail; 2203 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2204 return MatchOperand_ParseFail; 2205 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2206 return MatchOperand_ParseFail; 2207 2208 AMDGPUOperand::Modifiers Mods; 2209 Mods.Abs = Abs || SP3Abs; 2210 Mods.Neg = Neg || SP3Neg; 2211 2212 if (Mods.hasFPModifiers()) { 2213 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2214 Op.setModifiers(Mods); 2215 } 2216 return MatchOperand_Success; 2217 } 2218 2219 OperandMatchResultTy 2220 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2221 bool AllowImm) { 2222 bool Sext = trySkipId("sext"); 2223 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2224 return MatchOperand_ParseFail; 2225 2226 OperandMatchResultTy Res; 2227 if (AllowImm) { 2228 Res = parseRegOrImm(Operands); 2229 } else { 2230 Res = parseReg(Operands); 2231 } 2232 if (Res != MatchOperand_Success) { 2233 return Sext? MatchOperand_ParseFail : Res; 2234 } 2235 2236 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2237 return MatchOperand_ParseFail; 2238 2239 AMDGPUOperand::Modifiers Mods; 2240 Mods.Sext = Sext; 2241 2242 if (Mods.hasIntModifiers()) { 2243 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2244 Op.setModifiers(Mods); 2245 } 2246 2247 return MatchOperand_Success; 2248 } 2249 2250 OperandMatchResultTy 2251 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2252 return parseRegOrImmWithFPInputMods(Operands, false); 2253 } 2254 2255 OperandMatchResultTy 2256 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2257 return parseRegOrImmWithIntInputMods(Operands, false); 2258 } 2259 2260 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2261 auto Loc = getLoc(); 2262 if (trySkipId("off")) { 2263 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2264 AMDGPUOperand::ImmTyOff, false)); 2265 return MatchOperand_Success; 2266 } 2267 2268 if (!isRegister()) 2269 return MatchOperand_NoMatch; 2270 2271 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2272 if (Reg) { 2273 Operands.push_back(std::move(Reg)); 2274 return MatchOperand_Success; 2275 } 2276 2277 return MatchOperand_ParseFail; 2278 2279 } 2280 2281 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2282 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2283 2284 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2285 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2286 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2287 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2288 return Match_InvalidOperand; 2289 2290 if ((TSFlags & SIInstrFlags::VOP3) && 2291 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2292 getForcedEncodingSize() != 64) 2293 return Match_PreferE32; 2294 2295 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2296 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2297 // v_mac_f32/16 allow only dst_sel == DWORD; 2298 auto OpNum = 2299 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2300 const auto &Op = Inst.getOperand(OpNum); 2301 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2302 return Match_InvalidOperand; 2303 } 2304 } 2305 2306 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2307 // FIXME: Produces error without correct column reported. 2308 auto OpNum = 2309 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2310 const auto &Op = Inst.getOperand(OpNum); 2311 if (Op.getImm() != 0) 2312 return Match_InvalidOperand; 2313 } 2314 2315 return Match_Success; 2316 } 2317 2318 // What asm variants we should check 2319 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2320 if (getForcedEncodingSize() == 32) { 2321 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2322 return makeArrayRef(Variants); 2323 } 2324 2325 if (isForcedVOP3()) { 2326 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2327 return makeArrayRef(Variants); 2328 } 2329 2330 if (isForcedSDWA()) { 2331 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2332 AMDGPUAsmVariants::SDWA9}; 2333 return makeArrayRef(Variants); 2334 } 2335 2336 if (isForcedDPP()) { 2337 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2338 return makeArrayRef(Variants); 2339 } 2340 2341 static const unsigned Variants[] = { 2342 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2343 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2344 }; 2345 2346 return makeArrayRef(Variants); 2347 } 2348 2349 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2350 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2351 const unsigned Num = Desc.getNumImplicitUses(); 2352 for (unsigned i = 0; i < Num; ++i) { 2353 unsigned Reg = Desc.ImplicitUses[i]; 2354 switch (Reg) { 2355 case AMDGPU::FLAT_SCR: 2356 case AMDGPU::VCC: 2357 case AMDGPU::VCC_LO: 2358 case AMDGPU::VCC_HI: 2359 case AMDGPU::M0: 2360 case AMDGPU::SGPR_NULL: 2361 return Reg; 2362 default: 2363 break; 2364 } 2365 } 2366 return AMDGPU::NoRegister; 2367 } 2368 2369 // NB: This code is correct only when used to check constant 2370 // bus limitations because GFX7 support no f16 inline constants. 2371 // Note that there are no cases when a GFX7 opcode violates 2372 // constant bus limitations due to the use of an f16 constant. 2373 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2374 unsigned OpIdx) const { 2375 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2376 2377 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2378 return false; 2379 } 2380 2381 const MCOperand &MO = Inst.getOperand(OpIdx); 2382 2383 int64_t Val = MO.getImm(); 2384 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2385 2386 switch (OpSize) { // expected operand size 2387 case 8: 2388 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2389 case 4: 2390 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2391 case 2: { 2392 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2393 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2394 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2395 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2396 } else { 2397 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2398 } 2399 } 2400 default: 2401 llvm_unreachable("invalid operand size"); 2402 } 2403 } 2404 2405 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2406 const MCOperand &MO = Inst.getOperand(OpIdx); 2407 if (MO.isImm()) { 2408 return !isInlineConstant(Inst, OpIdx); 2409 } 2410 return !MO.isReg() || 2411 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2412 } 2413 2414 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2415 const unsigned Opcode = Inst.getOpcode(); 2416 const MCInstrDesc &Desc = MII.get(Opcode); 2417 unsigned ConstantBusUseCount = 0; 2418 2419 if (Desc.TSFlags & 2420 (SIInstrFlags::VOPC | 2421 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2422 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2423 SIInstrFlags::SDWA)) { 2424 // Check special imm operands (used by madmk, etc) 2425 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2426 ++ConstantBusUseCount; 2427 } 2428 2429 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2430 if (SGPRUsed != AMDGPU::NoRegister) { 2431 ++ConstantBusUseCount; 2432 } 2433 2434 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2435 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2436 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2437 2438 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2439 2440 for (int OpIdx : OpIndices) { 2441 if (OpIdx == -1) break; 2442 2443 const MCOperand &MO = Inst.getOperand(OpIdx); 2444 if (usesConstantBus(Inst, OpIdx)) { 2445 if (MO.isReg()) { 2446 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2447 // Pairs of registers with a partial intersections like these 2448 // s0, s[0:1] 2449 // flat_scratch_lo, flat_scratch 2450 // flat_scratch_lo, flat_scratch_hi 2451 // are theoretically valid but they are disabled anyway. 2452 // Note that this code mimics SIInstrInfo::verifyInstruction 2453 if (Reg != SGPRUsed) { 2454 ++ConstantBusUseCount; 2455 } 2456 SGPRUsed = Reg; 2457 } else { // Expression or a literal 2458 ++ConstantBusUseCount; 2459 } 2460 } 2461 } 2462 } 2463 2464 return ConstantBusUseCount <= 1; 2465 } 2466 2467 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2468 const unsigned Opcode = Inst.getOpcode(); 2469 const MCInstrDesc &Desc = MII.get(Opcode); 2470 2471 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2472 if (DstIdx == -1 || 2473 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2474 return true; 2475 } 2476 2477 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2478 2479 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2480 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2481 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2482 2483 assert(DstIdx != -1); 2484 const MCOperand &Dst = Inst.getOperand(DstIdx); 2485 assert(Dst.isReg()); 2486 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2487 2488 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2489 2490 for (int SrcIdx : SrcIndices) { 2491 if (SrcIdx == -1) break; 2492 const MCOperand &Src = Inst.getOperand(SrcIdx); 2493 if (Src.isReg()) { 2494 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2495 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2496 return false; 2497 } 2498 } 2499 } 2500 2501 return true; 2502 } 2503 2504 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2505 2506 const unsigned Opc = Inst.getOpcode(); 2507 const MCInstrDesc &Desc = MII.get(Opc); 2508 2509 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2510 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2511 assert(ClampIdx != -1); 2512 return Inst.getOperand(ClampIdx).getImm() == 0; 2513 } 2514 2515 return true; 2516 } 2517 2518 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2519 2520 const unsigned Opc = Inst.getOpcode(); 2521 const MCInstrDesc &Desc = MII.get(Opc); 2522 2523 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2524 return true; 2525 2526 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2527 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2528 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2529 2530 assert(VDataIdx != -1); 2531 assert(DMaskIdx != -1); 2532 assert(TFEIdx != -1); 2533 2534 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2535 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2536 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2537 if (DMask == 0) 2538 DMask = 1; 2539 2540 unsigned DataSize = 2541 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2542 if (hasPackedD16()) { 2543 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2544 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2545 DataSize = (DataSize + 1) / 2; 2546 } 2547 2548 return (VDataSize / 4) == DataSize + TFESize; 2549 } 2550 2551 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2552 2553 const unsigned Opc = Inst.getOpcode(); 2554 const MCInstrDesc &Desc = MII.get(Opc); 2555 2556 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2557 return true; 2558 if (!Desc.mayLoad() || !Desc.mayStore()) 2559 return true; // Not atomic 2560 2561 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2562 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2563 2564 // This is an incomplete check because image_atomic_cmpswap 2565 // may only use 0x3 and 0xf while other atomic operations 2566 // may use 0x1 and 0x3. However these limitations are 2567 // verified when we check that dmask matches dst size. 2568 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2569 } 2570 2571 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2572 2573 const unsigned Opc = Inst.getOpcode(); 2574 const MCInstrDesc &Desc = MII.get(Opc); 2575 2576 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2577 return true; 2578 2579 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2580 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2581 2582 // GATHER4 instructions use dmask in a different fashion compared to 2583 // other MIMG instructions. The only useful DMASK values are 2584 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2585 // (red,red,red,red) etc.) The ISA document doesn't mention 2586 // this. 2587 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2588 } 2589 2590 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2591 2592 const unsigned Opc = Inst.getOpcode(); 2593 const MCInstrDesc &Desc = MII.get(Opc); 2594 2595 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2596 return true; 2597 2598 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2599 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2600 if (isCI() || isSI()) 2601 return false; 2602 } 2603 2604 return true; 2605 } 2606 2607 static bool IsRevOpcode(const unsigned Opcode) 2608 { 2609 switch (Opcode) { 2610 case AMDGPU::V_SUBREV_F32_e32: 2611 case AMDGPU::V_SUBREV_F32_e64: 2612 case AMDGPU::V_SUBREV_F32_e32_si: 2613 case AMDGPU::V_SUBREV_F32_e32_vi: 2614 case AMDGPU::V_SUBREV_F32_e64_si: 2615 case AMDGPU::V_SUBREV_F32_e64_vi: 2616 case AMDGPU::V_SUBREV_I32_e32: 2617 case AMDGPU::V_SUBREV_I32_e64: 2618 case AMDGPU::V_SUBREV_I32_e32_si: 2619 case AMDGPU::V_SUBREV_I32_e64_si: 2620 case AMDGPU::V_SUBBREV_U32_e32: 2621 case AMDGPU::V_SUBBREV_U32_e64: 2622 case AMDGPU::V_SUBBREV_U32_e32_si: 2623 case AMDGPU::V_SUBBREV_U32_e32_vi: 2624 case AMDGPU::V_SUBBREV_U32_e64_si: 2625 case AMDGPU::V_SUBBREV_U32_e64_vi: 2626 case AMDGPU::V_SUBREV_U32_e32: 2627 case AMDGPU::V_SUBREV_U32_e64: 2628 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2629 case AMDGPU::V_SUBREV_U32_e32_vi: 2630 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2631 case AMDGPU::V_SUBREV_U32_e64_vi: 2632 case AMDGPU::V_SUBREV_F16_e32: 2633 case AMDGPU::V_SUBREV_F16_e64: 2634 case AMDGPU::V_SUBREV_F16_e32_vi: 2635 case AMDGPU::V_SUBREV_F16_e64_vi: 2636 case AMDGPU::V_SUBREV_U16_e32: 2637 case AMDGPU::V_SUBREV_U16_e64: 2638 case AMDGPU::V_SUBREV_U16_e32_vi: 2639 case AMDGPU::V_SUBREV_U16_e64_vi: 2640 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2641 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2642 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2643 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2644 case AMDGPU::V_LSHLREV_B32_e32_si: 2645 case AMDGPU::V_LSHLREV_B32_e64_si: 2646 case AMDGPU::V_LSHLREV_B16_e32_vi: 2647 case AMDGPU::V_LSHLREV_B16_e64_vi: 2648 case AMDGPU::V_LSHLREV_B32_e32_vi: 2649 case AMDGPU::V_LSHLREV_B32_e64_vi: 2650 case AMDGPU::V_LSHLREV_B64_vi: 2651 case AMDGPU::V_LSHRREV_B32_e32_si: 2652 case AMDGPU::V_LSHRREV_B32_e64_si: 2653 case AMDGPU::V_LSHRREV_B16_e32_vi: 2654 case AMDGPU::V_LSHRREV_B16_e64_vi: 2655 case AMDGPU::V_LSHRREV_B32_e32_vi: 2656 case AMDGPU::V_LSHRREV_B32_e64_vi: 2657 case AMDGPU::V_LSHRREV_B64_vi: 2658 case AMDGPU::V_ASHRREV_I32_e64_si: 2659 case AMDGPU::V_ASHRREV_I32_e32_si: 2660 case AMDGPU::V_ASHRREV_I16_e32_vi: 2661 case AMDGPU::V_ASHRREV_I16_e64_vi: 2662 case AMDGPU::V_ASHRREV_I32_e32_vi: 2663 case AMDGPU::V_ASHRREV_I32_e64_vi: 2664 case AMDGPU::V_ASHRREV_I64_vi: 2665 case AMDGPU::V_PK_LSHLREV_B16_vi: 2666 case AMDGPU::V_PK_LSHRREV_B16_vi: 2667 case AMDGPU::V_PK_ASHRREV_I16_vi: 2668 return true; 2669 default: 2670 return false; 2671 } 2672 } 2673 2674 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2675 2676 using namespace SIInstrFlags; 2677 const unsigned Opcode = Inst.getOpcode(); 2678 const MCInstrDesc &Desc = MII.get(Opcode); 2679 2680 // lds_direct register is defined so that it can be used 2681 // with 9-bit operands only. Ignore encodings which do not accept these. 2682 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2683 return true; 2684 2685 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2686 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2687 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2688 2689 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2690 2691 // lds_direct cannot be specified as either src1 or src2. 2692 for (int SrcIdx : SrcIndices) { 2693 if (SrcIdx == -1) break; 2694 const MCOperand &Src = Inst.getOperand(SrcIdx); 2695 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2696 return false; 2697 } 2698 } 2699 2700 if (Src0Idx == -1) 2701 return true; 2702 2703 const MCOperand &Src = Inst.getOperand(Src0Idx); 2704 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2705 return true; 2706 2707 // lds_direct is specified as src0. Check additional limitations. 2708 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2709 } 2710 2711 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2712 unsigned Opcode = Inst.getOpcode(); 2713 const MCInstrDesc &Desc = MII.get(Opcode); 2714 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2715 return true; 2716 2717 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2718 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2719 2720 const int OpIndices[] = { Src0Idx, Src1Idx }; 2721 2722 unsigned NumLiterals = 0; 2723 uint32_t LiteralValue; 2724 2725 for (int OpIdx : OpIndices) { 2726 if (OpIdx == -1) break; 2727 2728 const MCOperand &MO = Inst.getOperand(OpIdx); 2729 if (MO.isImm() && 2730 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2731 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2732 !isInlineConstant(Inst, OpIdx)) { 2733 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2734 if (NumLiterals == 0 || LiteralValue != Value) { 2735 LiteralValue = Value; 2736 ++NumLiterals; 2737 } 2738 } 2739 } 2740 2741 return NumLiterals <= 1; 2742 } 2743 2744 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2745 const SMLoc &IDLoc) { 2746 if (!validateLdsDirect(Inst)) { 2747 Error(IDLoc, 2748 "invalid use of lds_direct"); 2749 return false; 2750 } 2751 if (!validateSOPLiteral(Inst)) { 2752 Error(IDLoc, 2753 "only one literal operand is allowed"); 2754 return false; 2755 } 2756 if (!validateConstantBusLimitations(Inst)) { 2757 Error(IDLoc, 2758 "invalid operand (violates constant bus restrictions)"); 2759 return false; 2760 } 2761 if (!validateEarlyClobberLimitations(Inst)) { 2762 Error(IDLoc, 2763 "destination must be different than all sources"); 2764 return false; 2765 } 2766 if (!validateIntClampSupported(Inst)) { 2767 Error(IDLoc, 2768 "integer clamping is not supported on this GPU"); 2769 return false; 2770 } 2771 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2772 if (!validateMIMGD16(Inst)) { 2773 Error(IDLoc, 2774 "d16 modifier is not supported on this GPU"); 2775 return false; 2776 } 2777 if (!validateMIMGDataSize(Inst)) { 2778 Error(IDLoc, 2779 "image data size does not match dmask and tfe"); 2780 return false; 2781 } 2782 if (!validateMIMGAtomicDMask(Inst)) { 2783 Error(IDLoc, 2784 "invalid atomic image dmask"); 2785 return false; 2786 } 2787 if (!validateMIMGGatherDMask(Inst)) { 2788 Error(IDLoc, 2789 "invalid image_gather dmask: only one bit must be set"); 2790 return false; 2791 } 2792 2793 return true; 2794 } 2795 2796 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 2797 const FeatureBitset &FBS, 2798 unsigned VariantID = 0); 2799 2800 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2801 OperandVector &Operands, 2802 MCStreamer &Out, 2803 uint64_t &ErrorInfo, 2804 bool MatchingInlineAsm) { 2805 MCInst Inst; 2806 unsigned Result = Match_Success; 2807 for (auto Variant : getMatchedVariants()) { 2808 uint64_t EI; 2809 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2810 Variant); 2811 // We order match statuses from least to most specific. We use most specific 2812 // status as resulting 2813 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2814 if ((R == Match_Success) || 2815 (R == Match_PreferE32) || 2816 (R == Match_MissingFeature && Result != Match_PreferE32) || 2817 (R == Match_InvalidOperand && Result != Match_MissingFeature 2818 && Result != Match_PreferE32) || 2819 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2820 && Result != Match_MissingFeature 2821 && Result != Match_PreferE32)) { 2822 Result = R; 2823 ErrorInfo = EI; 2824 } 2825 if (R == Match_Success) 2826 break; 2827 } 2828 2829 switch (Result) { 2830 default: break; 2831 case Match_Success: 2832 if (!validateInstruction(Inst, IDLoc)) { 2833 return true; 2834 } 2835 Inst.setLoc(IDLoc); 2836 Out.EmitInstruction(Inst, getSTI()); 2837 return false; 2838 2839 case Match_MissingFeature: 2840 return Error(IDLoc, "instruction not supported on this GPU"); 2841 2842 case Match_MnemonicFail: { 2843 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2844 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2845 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2846 return Error(IDLoc, "invalid instruction" + Suggestion, 2847 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2848 } 2849 2850 case Match_InvalidOperand: { 2851 SMLoc ErrorLoc = IDLoc; 2852 if (ErrorInfo != ~0ULL) { 2853 if (ErrorInfo >= Operands.size()) { 2854 return Error(IDLoc, "too few operands for instruction"); 2855 } 2856 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2857 if (ErrorLoc == SMLoc()) 2858 ErrorLoc = IDLoc; 2859 } 2860 return Error(ErrorLoc, "invalid operand for instruction"); 2861 } 2862 2863 case Match_PreferE32: 2864 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2865 "should be encoded as e32"); 2866 } 2867 llvm_unreachable("Implement any new match types added!"); 2868 } 2869 2870 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2871 int64_t Tmp = -1; 2872 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2873 return true; 2874 } 2875 if (getParser().parseAbsoluteExpression(Tmp)) { 2876 return true; 2877 } 2878 Ret = static_cast<uint32_t>(Tmp); 2879 return false; 2880 } 2881 2882 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2883 uint32_t &Minor) { 2884 if (ParseAsAbsoluteExpression(Major)) 2885 return TokError("invalid major version"); 2886 2887 if (getLexer().isNot(AsmToken::Comma)) 2888 return TokError("minor version number required, comma expected"); 2889 Lex(); 2890 2891 if (ParseAsAbsoluteExpression(Minor)) 2892 return TokError("invalid minor version"); 2893 2894 return false; 2895 } 2896 2897 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2898 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2899 return TokError("directive only supported for amdgcn architecture"); 2900 2901 std::string Target; 2902 2903 SMLoc TargetStart = getTok().getLoc(); 2904 if (getParser().parseEscapedString(Target)) 2905 return true; 2906 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2907 2908 std::string ExpectedTarget; 2909 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2910 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2911 2912 if (Target != ExpectedTargetOS.str()) 2913 return getParser().Error(TargetRange.Start, "target must match options", 2914 TargetRange); 2915 2916 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2917 return false; 2918 } 2919 2920 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2921 return getParser().Error(Range.Start, "value out of range", Range); 2922 } 2923 2924 bool AMDGPUAsmParser::calculateGPRBlocks( 2925 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2926 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2927 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2928 unsigned &SGPRBlocks) { 2929 // TODO(scott.linder): These calculations are duplicated from 2930 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2931 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2932 2933 unsigned NumVGPRs = NextFreeVGPR; 2934 unsigned NumSGPRs = NextFreeSGPR; 2935 2936 if (Version.Major >= 10) 2937 NumSGPRs = 0; 2938 else { 2939 unsigned MaxAddressableNumSGPRs = 2940 IsaInfo::getAddressableNumSGPRs(&getSTI()); 2941 2942 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2943 NumSGPRs > MaxAddressableNumSGPRs) 2944 return OutOfRangeError(SGPRRange); 2945 2946 NumSGPRs += 2947 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2948 2949 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2950 NumSGPRs > MaxAddressableNumSGPRs) 2951 return OutOfRangeError(SGPRRange); 2952 2953 if (Features.test(FeatureSGPRInitBug)) 2954 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2955 } 2956 2957 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2958 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2959 2960 return false; 2961 } 2962 2963 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2964 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2965 return TokError("directive only supported for amdgcn architecture"); 2966 2967 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2968 return TokError("directive only supported for amdhsa OS"); 2969 2970 StringRef KernelName; 2971 if (getParser().parseIdentifier(KernelName)) 2972 return true; 2973 2974 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 2975 2976 StringSet<> Seen; 2977 2978 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2979 2980 SMRange VGPRRange; 2981 uint64_t NextFreeVGPR = 0; 2982 SMRange SGPRRange; 2983 uint64_t NextFreeSGPR = 0; 2984 unsigned UserSGPRCount = 0; 2985 bool ReserveVCC = true; 2986 bool ReserveFlatScr = true; 2987 bool ReserveXNACK = hasXNACK(); 2988 2989 while (true) { 2990 while (getLexer().is(AsmToken::EndOfStatement)) 2991 Lex(); 2992 2993 if (getLexer().isNot(AsmToken::Identifier)) 2994 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2995 2996 StringRef ID = getTok().getIdentifier(); 2997 SMRange IDRange = getTok().getLocRange(); 2998 Lex(); 2999 3000 if (ID == ".end_amdhsa_kernel") 3001 break; 3002 3003 if (Seen.find(ID) != Seen.end()) 3004 return TokError(".amdhsa_ directives cannot be repeated"); 3005 Seen.insert(ID); 3006 3007 SMLoc ValStart = getTok().getLoc(); 3008 int64_t IVal; 3009 if (getParser().parseAbsoluteExpression(IVal)) 3010 return true; 3011 SMLoc ValEnd = getTok().getLoc(); 3012 SMRange ValRange = SMRange(ValStart, ValEnd); 3013 3014 if (IVal < 0) 3015 return OutOfRangeError(ValRange); 3016 3017 uint64_t Val = IVal; 3018 3019 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3020 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3021 return OutOfRangeError(RANGE); \ 3022 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3023 3024 if (ID == ".amdhsa_group_segment_fixed_size") { 3025 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3026 return OutOfRangeError(ValRange); 3027 KD.group_segment_fixed_size = Val; 3028 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3029 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3030 return OutOfRangeError(ValRange); 3031 KD.private_segment_fixed_size = Val; 3032 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3033 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3034 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3035 Val, ValRange); 3036 UserSGPRCount += 4; 3037 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3038 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3039 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3040 ValRange); 3041 UserSGPRCount += 2; 3042 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3043 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3044 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3045 ValRange); 3046 UserSGPRCount += 2; 3047 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3048 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3049 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3050 Val, ValRange); 3051 UserSGPRCount += 2; 3052 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3053 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3054 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3055 ValRange); 3056 UserSGPRCount += 2; 3057 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3058 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3059 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3060 ValRange); 3061 UserSGPRCount += 2; 3062 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3063 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3064 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3065 Val, ValRange); 3066 UserSGPRCount += 1; 3067 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3068 PARSE_BITS_ENTRY( 3069 KD.compute_pgm_rsrc2, 3070 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3071 ValRange); 3072 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3073 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3074 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3075 ValRange); 3076 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3077 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3078 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3079 ValRange); 3080 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3081 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3082 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3083 ValRange); 3084 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3085 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3086 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3087 ValRange); 3088 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3089 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3090 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3091 ValRange); 3092 } else if (ID == ".amdhsa_next_free_vgpr") { 3093 VGPRRange = ValRange; 3094 NextFreeVGPR = Val; 3095 } else if (ID == ".amdhsa_next_free_sgpr") { 3096 SGPRRange = ValRange; 3097 NextFreeSGPR = Val; 3098 } else if (ID == ".amdhsa_reserve_vcc") { 3099 if (!isUInt<1>(Val)) 3100 return OutOfRangeError(ValRange); 3101 ReserveVCC = Val; 3102 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3103 if (IVersion.Major < 7) 3104 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3105 IDRange); 3106 if (!isUInt<1>(Val)) 3107 return OutOfRangeError(ValRange); 3108 ReserveFlatScr = Val; 3109 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3110 if (IVersion.Major < 8) 3111 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3112 IDRange); 3113 if (!isUInt<1>(Val)) 3114 return OutOfRangeError(ValRange); 3115 ReserveXNACK = Val; 3116 } else if (ID == ".amdhsa_float_round_mode_32") { 3117 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3118 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3119 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3120 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3121 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3122 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3123 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3124 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3125 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3126 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3127 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3128 ValRange); 3129 } else if (ID == ".amdhsa_dx10_clamp") { 3130 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3131 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3132 } else if (ID == ".amdhsa_ieee_mode") { 3133 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3134 Val, ValRange); 3135 } else if (ID == ".amdhsa_fp16_overflow") { 3136 if (IVersion.Major < 9) 3137 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3138 IDRange); 3139 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3140 ValRange); 3141 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3142 PARSE_BITS_ENTRY( 3143 KD.compute_pgm_rsrc2, 3144 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3145 ValRange); 3146 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3147 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3148 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3149 Val, ValRange); 3150 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3151 PARSE_BITS_ENTRY( 3152 KD.compute_pgm_rsrc2, 3153 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3154 ValRange); 3155 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3156 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3157 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3158 Val, ValRange); 3159 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3160 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3161 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3162 Val, ValRange); 3163 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3164 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3165 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3166 Val, ValRange); 3167 } else if (ID == ".amdhsa_exception_int_div_zero") { 3168 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3169 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3170 Val, ValRange); 3171 } else { 3172 return getParser().Error(IDRange.Start, 3173 "unknown .amdhsa_kernel directive", IDRange); 3174 } 3175 3176 #undef PARSE_BITS_ENTRY 3177 } 3178 3179 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3180 return TokError(".amdhsa_next_free_vgpr directive is required"); 3181 3182 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3183 return TokError(".amdhsa_next_free_sgpr directive is required"); 3184 3185 unsigned VGPRBlocks; 3186 unsigned SGPRBlocks; 3187 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3188 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3189 SGPRRange, VGPRBlocks, SGPRBlocks)) 3190 return true; 3191 3192 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3193 VGPRBlocks)) 3194 return OutOfRangeError(VGPRRange); 3195 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3196 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3197 3198 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3199 SGPRBlocks)) 3200 return OutOfRangeError(SGPRRange); 3201 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3202 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3203 SGPRBlocks); 3204 3205 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3206 return TokError("too many user SGPRs enabled"); 3207 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3208 UserSGPRCount); 3209 3210 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3211 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3212 ReserveFlatScr, ReserveXNACK); 3213 return false; 3214 } 3215 3216 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3217 uint32_t Major; 3218 uint32_t Minor; 3219 3220 if (ParseDirectiveMajorMinor(Major, Minor)) 3221 return true; 3222 3223 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3224 return false; 3225 } 3226 3227 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3228 uint32_t Major; 3229 uint32_t Minor; 3230 uint32_t Stepping; 3231 StringRef VendorName; 3232 StringRef ArchName; 3233 3234 // If this directive has no arguments, then use the ISA version for the 3235 // targeted GPU. 3236 if (getLexer().is(AsmToken::EndOfStatement)) { 3237 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3238 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3239 ISA.Stepping, 3240 "AMD", "AMDGPU"); 3241 return false; 3242 } 3243 3244 if (ParseDirectiveMajorMinor(Major, Minor)) 3245 return true; 3246 3247 if (getLexer().isNot(AsmToken::Comma)) 3248 return TokError("stepping version number required, comma expected"); 3249 Lex(); 3250 3251 if (ParseAsAbsoluteExpression(Stepping)) 3252 return TokError("invalid stepping version"); 3253 3254 if (getLexer().isNot(AsmToken::Comma)) 3255 return TokError("vendor name required, comma expected"); 3256 Lex(); 3257 3258 if (getLexer().isNot(AsmToken::String)) 3259 return TokError("invalid vendor name"); 3260 3261 VendorName = getLexer().getTok().getStringContents(); 3262 Lex(); 3263 3264 if (getLexer().isNot(AsmToken::Comma)) 3265 return TokError("arch name required, comma expected"); 3266 Lex(); 3267 3268 if (getLexer().isNot(AsmToken::String)) 3269 return TokError("invalid arch name"); 3270 3271 ArchName = getLexer().getTok().getStringContents(); 3272 Lex(); 3273 3274 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3275 VendorName, ArchName); 3276 return false; 3277 } 3278 3279 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3280 amd_kernel_code_t &Header) { 3281 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3282 // assembly for backwards compatibility. 3283 if (ID == "max_scratch_backing_memory_byte_size") { 3284 Parser.eatToEndOfStatement(); 3285 return false; 3286 } 3287 3288 SmallString<40> ErrStr; 3289 raw_svector_ostream Err(ErrStr); 3290 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3291 return TokError(Err.str()); 3292 } 3293 Lex(); 3294 return false; 3295 } 3296 3297 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3298 amd_kernel_code_t Header; 3299 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3300 3301 while (true) { 3302 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3303 // will set the current token to EndOfStatement. 3304 while(getLexer().is(AsmToken::EndOfStatement)) 3305 Lex(); 3306 3307 if (getLexer().isNot(AsmToken::Identifier)) 3308 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3309 3310 StringRef ID = getLexer().getTok().getIdentifier(); 3311 Lex(); 3312 3313 if (ID == ".end_amd_kernel_code_t") 3314 break; 3315 3316 if (ParseAMDKernelCodeTValue(ID, Header)) 3317 return true; 3318 } 3319 3320 getTargetStreamer().EmitAMDKernelCodeT(Header); 3321 3322 return false; 3323 } 3324 3325 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3326 if (getLexer().isNot(AsmToken::Identifier)) 3327 return TokError("expected symbol name"); 3328 3329 StringRef KernelName = Parser.getTok().getString(); 3330 3331 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3332 ELF::STT_AMDGPU_HSA_KERNEL); 3333 Lex(); 3334 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3335 KernelScope.initialize(getContext()); 3336 return false; 3337 } 3338 3339 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3340 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3341 return Error(getParser().getTok().getLoc(), 3342 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3343 "architectures"); 3344 } 3345 3346 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3347 3348 std::string ISAVersionStringFromSTI; 3349 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3350 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3351 3352 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3353 return Error(getParser().getTok().getLoc(), 3354 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3355 "arguments specified through the command line"); 3356 } 3357 3358 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3359 Lex(); 3360 3361 return false; 3362 } 3363 3364 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3365 const char *AssemblerDirectiveBegin; 3366 const char *AssemblerDirectiveEnd; 3367 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3368 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3369 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3370 HSAMD::V3::AssemblerDirectiveEnd) 3371 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3372 HSAMD::AssemblerDirectiveEnd); 3373 3374 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3375 return Error(getParser().getTok().getLoc(), 3376 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3377 "not available on non-amdhsa OSes")).str()); 3378 } 3379 3380 std::string HSAMetadataString; 3381 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3382 HSAMetadataString)) 3383 return true; 3384 3385 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3386 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3387 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3388 } else { 3389 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3390 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3391 } 3392 3393 return false; 3394 } 3395 3396 /// Common code to parse out a block of text (typically YAML) between start and 3397 /// end directives. 3398 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3399 const char *AssemblerDirectiveEnd, 3400 std::string &CollectString) { 3401 3402 raw_string_ostream CollectStream(CollectString); 3403 3404 getLexer().setSkipSpace(false); 3405 3406 bool FoundEnd = false; 3407 while (!getLexer().is(AsmToken::Eof)) { 3408 while (getLexer().is(AsmToken::Space)) { 3409 CollectStream << getLexer().getTok().getString(); 3410 Lex(); 3411 } 3412 3413 if (getLexer().is(AsmToken::Identifier)) { 3414 StringRef ID = getLexer().getTok().getIdentifier(); 3415 if (ID == AssemblerDirectiveEnd) { 3416 Lex(); 3417 FoundEnd = true; 3418 break; 3419 } 3420 } 3421 3422 CollectStream << Parser.parseStringToEndOfStatement() 3423 << getContext().getAsmInfo()->getSeparatorString(); 3424 3425 Parser.eatToEndOfStatement(); 3426 } 3427 3428 getLexer().setSkipSpace(true); 3429 3430 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3431 return TokError(Twine("expected directive ") + 3432 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3433 } 3434 3435 CollectStream.flush(); 3436 return false; 3437 } 3438 3439 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3440 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3441 std::string String; 3442 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3443 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3444 return true; 3445 3446 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3447 if (!PALMetadata->setFromString(String)) 3448 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3449 return false; 3450 } 3451 3452 /// Parse the assembler directive for old linear-format PAL metadata. 3453 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3454 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3455 return Error(getParser().getTok().getLoc(), 3456 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3457 "not available on non-amdpal OSes")).str()); 3458 } 3459 3460 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3461 PALMetadata->setLegacy(); 3462 for (;;) { 3463 uint32_t Key, Value; 3464 if (ParseAsAbsoluteExpression(Key)) { 3465 return TokError(Twine("invalid value in ") + 3466 Twine(PALMD::AssemblerDirective)); 3467 } 3468 if (getLexer().isNot(AsmToken::Comma)) { 3469 return TokError(Twine("expected an even number of values in ") + 3470 Twine(PALMD::AssemblerDirective)); 3471 } 3472 Lex(); 3473 if (ParseAsAbsoluteExpression(Value)) { 3474 return TokError(Twine("invalid value in ") + 3475 Twine(PALMD::AssemblerDirective)); 3476 } 3477 PALMetadata->setRegister(Key, Value); 3478 if (getLexer().isNot(AsmToken::Comma)) 3479 break; 3480 Lex(); 3481 } 3482 return false; 3483 } 3484 3485 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3486 StringRef IDVal = DirectiveID.getString(); 3487 3488 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3489 if (IDVal == ".amdgcn_target") 3490 return ParseDirectiveAMDGCNTarget(); 3491 3492 if (IDVal == ".amdhsa_kernel") 3493 return ParseDirectiveAMDHSAKernel(); 3494 3495 // TODO: Restructure/combine with PAL metadata directive. 3496 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3497 return ParseDirectiveHSAMetadata(); 3498 } else { 3499 if (IDVal == ".hsa_code_object_version") 3500 return ParseDirectiveHSACodeObjectVersion(); 3501 3502 if (IDVal == ".hsa_code_object_isa") 3503 return ParseDirectiveHSACodeObjectISA(); 3504 3505 if (IDVal == ".amd_kernel_code_t") 3506 return ParseDirectiveAMDKernelCodeT(); 3507 3508 if (IDVal == ".amdgpu_hsa_kernel") 3509 return ParseDirectiveAMDGPUHsaKernel(); 3510 3511 if (IDVal == ".amd_amdgpu_isa") 3512 return ParseDirectiveISAVersion(); 3513 3514 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3515 return ParseDirectiveHSAMetadata(); 3516 } 3517 3518 if (IDVal == PALMD::AssemblerDirectiveBegin) 3519 return ParseDirectivePALMetadataBegin(); 3520 3521 if (IDVal == PALMD::AssemblerDirective) 3522 return ParseDirectivePALMetadata(); 3523 3524 return true; 3525 } 3526 3527 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3528 unsigned RegNo) const { 3529 3530 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3531 R.isValid(); ++R) { 3532 if (*R == RegNo) 3533 return isGFX9() || isGFX10(); 3534 } 3535 3536 // GFX10 has 2 more SGPRs 104 and 105. 3537 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 3538 R.isValid(); ++R) { 3539 if (*R == RegNo) 3540 return hasSGPR104_SGPR105(); 3541 } 3542 3543 switch (RegNo) { 3544 case AMDGPU::TBA: 3545 case AMDGPU::TBA_LO: 3546 case AMDGPU::TBA_HI: 3547 case AMDGPU::TMA: 3548 case AMDGPU::TMA_LO: 3549 case AMDGPU::TMA_HI: 3550 return !isGFX9() && !isGFX10(); 3551 case AMDGPU::XNACK_MASK: 3552 case AMDGPU::XNACK_MASK_LO: 3553 case AMDGPU::XNACK_MASK_HI: 3554 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 3555 case AMDGPU::SGPR_NULL: 3556 return isGFX10(); 3557 default: 3558 break; 3559 } 3560 3561 if (isInlineValue(RegNo)) 3562 return !isCI() && !isSI() && !isVI(); 3563 3564 if (isCI()) 3565 return true; 3566 3567 if (isSI() || isGFX10()) { 3568 // No flat_scr on SI. 3569 // On GFX10 flat scratch is not a valid register operand and can only be 3570 // accessed with s_setreg/s_getreg. 3571 switch (RegNo) { 3572 case AMDGPU::FLAT_SCR: 3573 case AMDGPU::FLAT_SCR_LO: 3574 case AMDGPU::FLAT_SCR_HI: 3575 return false; 3576 default: 3577 return true; 3578 } 3579 } 3580 3581 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3582 // SI/CI have. 3583 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3584 R.isValid(); ++R) { 3585 if (*R == RegNo) 3586 return hasSGPR102_SGPR103(); 3587 } 3588 3589 return true; 3590 } 3591 3592 OperandMatchResultTy 3593 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3594 // Try to parse with a custom parser 3595 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3596 3597 // If we successfully parsed the operand or if there as an error parsing, 3598 // we are done. 3599 // 3600 // If we are parsing after we reach EndOfStatement then this means we 3601 // are appending default values to the Operands list. This is only done 3602 // by custom parser, so we shouldn't continue on to the generic parsing. 3603 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3604 getLexer().is(AsmToken::EndOfStatement)) 3605 return ResTy; 3606 3607 ResTy = parseRegOrImm(Operands); 3608 3609 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) 3610 return ResTy; 3611 3612 const auto &Tok = Parser.getTok(); 3613 SMLoc S = Tok.getLoc(); 3614 3615 const MCExpr *Expr = nullptr; 3616 if (!Parser.parseExpression(Expr)) { 3617 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3618 return MatchOperand_Success; 3619 } 3620 3621 // Possibly this is an instruction flag like 'gds'. 3622 if (Tok.getKind() == AsmToken::Identifier) { 3623 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3624 Parser.Lex(); 3625 return MatchOperand_Success; 3626 } 3627 3628 return MatchOperand_NoMatch; 3629 } 3630 3631 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3632 // Clear any forced encodings from the previous instruction. 3633 setForcedEncodingSize(0); 3634 setForcedDPP(false); 3635 setForcedSDWA(false); 3636 3637 if (Name.endswith("_e64")) { 3638 setForcedEncodingSize(64); 3639 return Name.substr(0, Name.size() - 4); 3640 } else if (Name.endswith("_e32")) { 3641 setForcedEncodingSize(32); 3642 return Name.substr(0, Name.size() - 4); 3643 } else if (Name.endswith("_dpp")) { 3644 setForcedDPP(true); 3645 return Name.substr(0, Name.size() - 4); 3646 } else if (Name.endswith("_sdwa")) { 3647 setForcedSDWA(true); 3648 return Name.substr(0, Name.size() - 5); 3649 } 3650 return Name; 3651 } 3652 3653 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3654 StringRef Name, 3655 SMLoc NameLoc, OperandVector &Operands) { 3656 // Add the instruction mnemonic 3657 Name = parseMnemonicSuffix(Name); 3658 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3659 3660 while (!getLexer().is(AsmToken::EndOfStatement)) { 3661 OperandMatchResultTy Res = parseOperand(Operands, Name); 3662 3663 // Eat the comma or space if there is one. 3664 if (getLexer().is(AsmToken::Comma)) 3665 Parser.Lex(); 3666 3667 switch (Res) { 3668 case MatchOperand_Success: break; 3669 case MatchOperand_ParseFail: 3670 Error(getLexer().getLoc(), "failed parsing operand."); 3671 while (!getLexer().is(AsmToken::EndOfStatement)) { 3672 Parser.Lex(); 3673 } 3674 return true; 3675 case MatchOperand_NoMatch: 3676 Error(getLexer().getLoc(), "not a valid operand."); 3677 while (!getLexer().is(AsmToken::EndOfStatement)) { 3678 Parser.Lex(); 3679 } 3680 return true; 3681 } 3682 } 3683 3684 return false; 3685 } 3686 3687 //===----------------------------------------------------------------------===// 3688 // Utility functions 3689 //===----------------------------------------------------------------------===// 3690 3691 OperandMatchResultTy 3692 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3693 switch(getLexer().getKind()) { 3694 default: return MatchOperand_NoMatch; 3695 case AsmToken::Identifier: { 3696 StringRef Name = Parser.getTok().getString(); 3697 if (!Name.equals(Prefix)) { 3698 return MatchOperand_NoMatch; 3699 } 3700 3701 Parser.Lex(); 3702 if (getLexer().isNot(AsmToken::Colon)) 3703 return MatchOperand_ParseFail; 3704 3705 Parser.Lex(); 3706 3707 bool IsMinus = false; 3708 if (getLexer().getKind() == AsmToken::Minus) { 3709 Parser.Lex(); 3710 IsMinus = true; 3711 } 3712 3713 if (getLexer().isNot(AsmToken::Integer)) 3714 return MatchOperand_ParseFail; 3715 3716 if (getParser().parseAbsoluteExpression(Int)) 3717 return MatchOperand_ParseFail; 3718 3719 if (IsMinus) 3720 Int = -Int; 3721 break; 3722 } 3723 } 3724 return MatchOperand_Success; 3725 } 3726 3727 OperandMatchResultTy 3728 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3729 AMDGPUOperand::ImmTy ImmTy, 3730 bool (*ConvertResult)(int64_t&)) { 3731 SMLoc S = Parser.getTok().getLoc(); 3732 int64_t Value = 0; 3733 3734 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3735 if (Res != MatchOperand_Success) 3736 return Res; 3737 3738 if (ConvertResult && !ConvertResult(Value)) { 3739 return MatchOperand_ParseFail; 3740 } 3741 3742 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3743 return MatchOperand_Success; 3744 } 3745 3746 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3747 const char *Prefix, 3748 OperandVector &Operands, 3749 AMDGPUOperand::ImmTy ImmTy, 3750 bool (*ConvertResult)(int64_t&)) { 3751 StringRef Name = Parser.getTok().getString(); 3752 if (!Name.equals(Prefix)) 3753 return MatchOperand_NoMatch; 3754 3755 Parser.Lex(); 3756 if (getLexer().isNot(AsmToken::Colon)) 3757 return MatchOperand_ParseFail; 3758 3759 Parser.Lex(); 3760 if (getLexer().isNot(AsmToken::LBrac)) 3761 return MatchOperand_ParseFail; 3762 Parser.Lex(); 3763 3764 unsigned Val = 0; 3765 SMLoc S = Parser.getTok().getLoc(); 3766 3767 // FIXME: How to verify the number of elements matches the number of src 3768 // operands? 3769 for (int I = 0; I < 4; ++I) { 3770 if (I != 0) { 3771 if (getLexer().is(AsmToken::RBrac)) 3772 break; 3773 3774 if (getLexer().isNot(AsmToken::Comma)) 3775 return MatchOperand_ParseFail; 3776 Parser.Lex(); 3777 } 3778 3779 if (getLexer().isNot(AsmToken::Integer)) 3780 return MatchOperand_ParseFail; 3781 3782 int64_t Op; 3783 if (getParser().parseAbsoluteExpression(Op)) 3784 return MatchOperand_ParseFail; 3785 3786 if (Op != 0 && Op != 1) 3787 return MatchOperand_ParseFail; 3788 Val |= (Op << I); 3789 } 3790 3791 Parser.Lex(); 3792 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3793 return MatchOperand_Success; 3794 } 3795 3796 OperandMatchResultTy 3797 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3798 AMDGPUOperand::ImmTy ImmTy) { 3799 int64_t Bit = 0; 3800 SMLoc S = Parser.getTok().getLoc(); 3801 3802 // We are at the end of the statement, and this is a default argument, so 3803 // use a default value. 3804 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3805 switch(getLexer().getKind()) { 3806 case AsmToken::Identifier: { 3807 StringRef Tok = Parser.getTok().getString(); 3808 if (Tok == Name) { 3809 if (Tok == "r128" && isGFX9()) 3810 Error(S, "r128 modifier is not supported on this GPU"); 3811 if (Tok == "a16" && !isGFX9()) 3812 Error(S, "a16 modifier is not supported on this GPU"); 3813 Bit = 1; 3814 Parser.Lex(); 3815 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3816 Bit = 0; 3817 Parser.Lex(); 3818 } else { 3819 return MatchOperand_NoMatch; 3820 } 3821 break; 3822 } 3823 default: 3824 return MatchOperand_NoMatch; 3825 } 3826 } 3827 3828 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3829 return MatchOperand_Success; 3830 } 3831 3832 static void addOptionalImmOperand( 3833 MCInst& Inst, const OperandVector& Operands, 3834 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3835 AMDGPUOperand::ImmTy ImmT, 3836 int64_t Default = 0) { 3837 auto i = OptionalIdx.find(ImmT); 3838 if (i != OptionalIdx.end()) { 3839 unsigned Idx = i->second; 3840 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3841 } else { 3842 Inst.addOperand(MCOperand::createImm(Default)); 3843 } 3844 } 3845 3846 OperandMatchResultTy 3847 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3848 if (getLexer().isNot(AsmToken::Identifier)) { 3849 return MatchOperand_NoMatch; 3850 } 3851 StringRef Tok = Parser.getTok().getString(); 3852 if (Tok != Prefix) { 3853 return MatchOperand_NoMatch; 3854 } 3855 3856 Parser.Lex(); 3857 if (getLexer().isNot(AsmToken::Colon)) { 3858 return MatchOperand_ParseFail; 3859 } 3860 3861 Parser.Lex(); 3862 if (getLexer().isNot(AsmToken::Identifier)) { 3863 return MatchOperand_ParseFail; 3864 } 3865 3866 Value = Parser.getTok().getString(); 3867 return MatchOperand_Success; 3868 } 3869 3870 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3871 // values to live in a joint format operand in the MCInst encoding. 3872 OperandMatchResultTy 3873 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3874 SMLoc S = Parser.getTok().getLoc(); 3875 int64_t Dfmt = 0, Nfmt = 0; 3876 // dfmt and nfmt can appear in either order, and each is optional. 3877 bool GotDfmt = false, GotNfmt = false; 3878 while (!GotDfmt || !GotNfmt) { 3879 if (!GotDfmt) { 3880 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3881 if (Res != MatchOperand_NoMatch) { 3882 if (Res != MatchOperand_Success) 3883 return Res; 3884 if (Dfmt >= 16) { 3885 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3886 return MatchOperand_ParseFail; 3887 } 3888 GotDfmt = true; 3889 Parser.Lex(); 3890 continue; 3891 } 3892 } 3893 if (!GotNfmt) { 3894 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3895 if (Res != MatchOperand_NoMatch) { 3896 if (Res != MatchOperand_Success) 3897 return Res; 3898 if (Nfmt >= 8) { 3899 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3900 return MatchOperand_ParseFail; 3901 } 3902 GotNfmt = true; 3903 Parser.Lex(); 3904 continue; 3905 } 3906 } 3907 break; 3908 } 3909 if (!GotDfmt && !GotNfmt) 3910 return MatchOperand_NoMatch; 3911 auto Format = Dfmt | Nfmt << 4; 3912 Operands.push_back( 3913 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3914 return MatchOperand_Success; 3915 } 3916 3917 //===----------------------------------------------------------------------===// 3918 // ds 3919 //===----------------------------------------------------------------------===// 3920 3921 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3922 const OperandVector &Operands) { 3923 OptionalImmIndexMap OptionalIdx; 3924 3925 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3926 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3927 3928 // Add the register arguments 3929 if (Op.isReg()) { 3930 Op.addRegOperands(Inst, 1); 3931 continue; 3932 } 3933 3934 // Handle optional arguments 3935 OptionalIdx[Op.getImmTy()] = i; 3936 } 3937 3938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3941 3942 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3943 } 3944 3945 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3946 bool IsGdsHardcoded) { 3947 OptionalImmIndexMap OptionalIdx; 3948 3949 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3950 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3951 3952 // Add the register arguments 3953 if (Op.isReg()) { 3954 Op.addRegOperands(Inst, 1); 3955 continue; 3956 } 3957 3958 if (Op.isToken() && Op.getToken() == "gds") { 3959 IsGdsHardcoded = true; 3960 continue; 3961 } 3962 3963 // Handle optional arguments 3964 OptionalIdx[Op.getImmTy()] = i; 3965 } 3966 3967 AMDGPUOperand::ImmTy OffsetType = 3968 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3969 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3970 AMDGPUOperand::ImmTyOffset; 3971 3972 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3973 3974 if (!IsGdsHardcoded) { 3975 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3976 } 3977 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3978 } 3979 3980 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3981 OptionalImmIndexMap OptionalIdx; 3982 3983 unsigned OperandIdx[4]; 3984 unsigned EnMask = 0; 3985 int SrcIdx = 0; 3986 3987 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3988 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3989 3990 // Add the register arguments 3991 if (Op.isReg()) { 3992 assert(SrcIdx < 4); 3993 OperandIdx[SrcIdx] = Inst.size(); 3994 Op.addRegOperands(Inst, 1); 3995 ++SrcIdx; 3996 continue; 3997 } 3998 3999 if (Op.isOff()) { 4000 assert(SrcIdx < 4); 4001 OperandIdx[SrcIdx] = Inst.size(); 4002 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4003 ++SrcIdx; 4004 continue; 4005 } 4006 4007 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4008 Op.addImmOperands(Inst, 1); 4009 continue; 4010 } 4011 4012 if (Op.isToken() && Op.getToken() == "done") 4013 continue; 4014 4015 // Handle optional arguments 4016 OptionalIdx[Op.getImmTy()] = i; 4017 } 4018 4019 assert(SrcIdx == 4); 4020 4021 bool Compr = false; 4022 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4023 Compr = true; 4024 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4025 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4026 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4027 } 4028 4029 for (auto i = 0; i < SrcIdx; ++i) { 4030 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4031 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4032 } 4033 } 4034 4035 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4036 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4037 4038 Inst.addOperand(MCOperand::createImm(EnMask)); 4039 } 4040 4041 //===----------------------------------------------------------------------===// 4042 // s_waitcnt 4043 //===----------------------------------------------------------------------===// 4044 4045 static bool 4046 encodeCnt( 4047 const AMDGPU::IsaVersion ISA, 4048 int64_t &IntVal, 4049 int64_t CntVal, 4050 bool Saturate, 4051 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4052 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4053 { 4054 bool Failed = false; 4055 4056 IntVal = encode(ISA, IntVal, CntVal); 4057 if (CntVal != decode(ISA, IntVal)) { 4058 if (Saturate) { 4059 IntVal = encode(ISA, IntVal, -1); 4060 } else { 4061 Failed = true; 4062 } 4063 } 4064 return Failed; 4065 } 4066 4067 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4068 StringRef CntName = Parser.getTok().getString(); 4069 int64_t CntVal; 4070 4071 Parser.Lex(); 4072 if (getLexer().isNot(AsmToken::LParen)) 4073 return true; 4074 4075 Parser.Lex(); 4076 if (getLexer().isNot(AsmToken::Integer)) 4077 return true; 4078 4079 SMLoc ValLoc = Parser.getTok().getLoc(); 4080 if (getParser().parseAbsoluteExpression(CntVal)) 4081 return true; 4082 4083 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4084 4085 bool Failed = true; 4086 bool Sat = CntName.endswith("_sat"); 4087 4088 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4089 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4090 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4091 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4092 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4093 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4094 } 4095 4096 if (Failed) { 4097 Error(ValLoc, "too large value for " + CntName); 4098 return true; 4099 } 4100 4101 if (getLexer().isNot(AsmToken::RParen)) { 4102 return true; 4103 } 4104 4105 Parser.Lex(); 4106 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 4107 const AsmToken NextToken = getLexer().peekTok(); 4108 if (NextToken.is(AsmToken::Identifier)) { 4109 Parser.Lex(); 4110 } 4111 } 4112 4113 return false; 4114 } 4115 4116 OperandMatchResultTy 4117 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4118 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4119 int64_t Waitcnt = getWaitcntBitMask(ISA); 4120 SMLoc S = Parser.getTok().getLoc(); 4121 4122 switch(getLexer().getKind()) { 4123 default: return MatchOperand_ParseFail; 4124 case AsmToken::Integer: 4125 // The operand can be an integer value. 4126 if (getParser().parseAbsoluteExpression(Waitcnt)) 4127 return MatchOperand_ParseFail; 4128 break; 4129 4130 case AsmToken::Identifier: 4131 do { 4132 if (parseCnt(Waitcnt)) 4133 return MatchOperand_ParseFail; 4134 } while(getLexer().isNot(AsmToken::EndOfStatement)); 4135 break; 4136 } 4137 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4138 return MatchOperand_Success; 4139 } 4140 4141 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4142 int64_t &Width) { 4143 using namespace llvm::AMDGPU::Hwreg; 4144 4145 if (Parser.getTok().getString() != "hwreg") 4146 return true; 4147 Parser.Lex(); 4148 4149 if (getLexer().isNot(AsmToken::LParen)) 4150 return true; 4151 Parser.Lex(); 4152 4153 if (getLexer().is(AsmToken::Identifier)) { 4154 HwReg.IsSymbolic = true; 4155 HwReg.Id = ID_UNKNOWN_; 4156 const StringRef tok = Parser.getTok().getString(); 4157 int Last = ID_SYMBOLIC_LAST_; 4158 if (isSI() || isCI() || isVI()) 4159 Last = ID_SYMBOLIC_FIRST_GFX9_; 4160 else if (isGFX9()) 4161 Last = ID_SYMBOLIC_FIRST_GFX10_; 4162 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4163 if (tok == IdSymbolic[i]) { 4164 HwReg.Id = i; 4165 break; 4166 } 4167 } 4168 Parser.Lex(); 4169 } else { 4170 HwReg.IsSymbolic = false; 4171 if (getLexer().isNot(AsmToken::Integer)) 4172 return true; 4173 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4174 return true; 4175 } 4176 4177 if (getLexer().is(AsmToken::RParen)) { 4178 Parser.Lex(); 4179 return false; 4180 } 4181 4182 // optional params 4183 if (getLexer().isNot(AsmToken::Comma)) 4184 return true; 4185 Parser.Lex(); 4186 4187 if (getLexer().isNot(AsmToken::Integer)) 4188 return true; 4189 if (getParser().parseAbsoluteExpression(Offset)) 4190 return true; 4191 4192 if (getLexer().isNot(AsmToken::Comma)) 4193 return true; 4194 Parser.Lex(); 4195 4196 if (getLexer().isNot(AsmToken::Integer)) 4197 return true; 4198 if (getParser().parseAbsoluteExpression(Width)) 4199 return true; 4200 4201 if (getLexer().isNot(AsmToken::RParen)) 4202 return true; 4203 Parser.Lex(); 4204 4205 return false; 4206 } 4207 4208 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4209 using namespace llvm::AMDGPU::Hwreg; 4210 4211 int64_t Imm16Val = 0; 4212 SMLoc S = Parser.getTok().getLoc(); 4213 4214 switch(getLexer().getKind()) { 4215 default: return MatchOperand_NoMatch; 4216 case AsmToken::Integer: 4217 // The operand can be an integer value. 4218 if (getParser().parseAbsoluteExpression(Imm16Val)) 4219 return MatchOperand_NoMatch; 4220 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4221 Error(S, "invalid immediate: only 16-bit values are legal"); 4222 // Do not return error code, but create an imm operand anyway and proceed 4223 // to the next operand, if any. That avoids unneccessary error messages. 4224 } 4225 break; 4226 4227 case AsmToken::Identifier: { 4228 OperandInfoTy HwReg(ID_UNKNOWN_); 4229 int64_t Offset = OFFSET_DEFAULT_; 4230 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4231 if (parseHwregConstruct(HwReg, Offset, Width)) 4232 return MatchOperand_ParseFail; 4233 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4234 if (HwReg.IsSymbolic) 4235 Error(S, "invalid symbolic name of hardware register"); 4236 else 4237 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4238 } 4239 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4240 Error(S, "invalid bit offset: only 5-bit values are legal"); 4241 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4242 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4243 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4244 } 4245 break; 4246 } 4247 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4248 return MatchOperand_Success; 4249 } 4250 4251 bool AMDGPUOperand::isSWaitCnt() const { 4252 return isImm(); 4253 } 4254 4255 bool AMDGPUOperand::isHwreg() const { 4256 return isImmTy(ImmTyHwreg); 4257 } 4258 4259 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4260 using namespace llvm::AMDGPU::SendMsg; 4261 4262 if (Parser.getTok().getString() != "sendmsg") 4263 return true; 4264 Parser.Lex(); 4265 4266 if (getLexer().isNot(AsmToken::LParen)) 4267 return true; 4268 Parser.Lex(); 4269 4270 if (getLexer().is(AsmToken::Identifier)) { 4271 Msg.IsSymbolic = true; 4272 Msg.Id = ID_UNKNOWN_; 4273 const std::string tok = Parser.getTok().getString(); 4274 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4275 switch(i) { 4276 default: continue; // Omit gaps. 4277 case ID_GS_ALLOC_REQ: 4278 if (isSI() || isCI() || isVI()) 4279 continue; 4280 break; 4281 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: 4282 case ID_SYSMSG: break; 4283 } 4284 if (tok == IdSymbolic[i]) { 4285 Msg.Id = i; 4286 break; 4287 } 4288 } 4289 Parser.Lex(); 4290 } else { 4291 Msg.IsSymbolic = false; 4292 if (getLexer().isNot(AsmToken::Integer)) 4293 return true; 4294 if (getParser().parseAbsoluteExpression(Msg.Id)) 4295 return true; 4296 if (getLexer().is(AsmToken::Integer)) 4297 if (getParser().parseAbsoluteExpression(Msg.Id)) 4298 Msg.Id = ID_UNKNOWN_; 4299 } 4300 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4301 return false; 4302 4303 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4304 if (getLexer().isNot(AsmToken::RParen)) 4305 return true; 4306 Parser.Lex(); 4307 return false; 4308 } 4309 4310 if (getLexer().isNot(AsmToken::Comma)) 4311 return true; 4312 Parser.Lex(); 4313 4314 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4315 Operation.Id = ID_UNKNOWN_; 4316 if (getLexer().is(AsmToken::Identifier)) { 4317 Operation.IsSymbolic = true; 4318 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4319 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4320 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4321 const StringRef Tok = Parser.getTok().getString(); 4322 for (int i = F; i < L; ++i) { 4323 if (Tok == S[i]) { 4324 Operation.Id = i; 4325 break; 4326 } 4327 } 4328 Parser.Lex(); 4329 } else { 4330 Operation.IsSymbolic = false; 4331 if (getLexer().isNot(AsmToken::Integer)) 4332 return true; 4333 if (getParser().parseAbsoluteExpression(Operation.Id)) 4334 return true; 4335 } 4336 4337 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4338 // Stream id is optional. 4339 if (getLexer().is(AsmToken::RParen)) { 4340 Parser.Lex(); 4341 return false; 4342 } 4343 4344 if (getLexer().isNot(AsmToken::Comma)) 4345 return true; 4346 Parser.Lex(); 4347 4348 if (getLexer().isNot(AsmToken::Integer)) 4349 return true; 4350 if (getParser().parseAbsoluteExpression(StreamId)) 4351 return true; 4352 } 4353 4354 if (getLexer().isNot(AsmToken::RParen)) 4355 return true; 4356 Parser.Lex(); 4357 return false; 4358 } 4359 4360 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4361 if (getLexer().getKind() != AsmToken::Identifier) 4362 return MatchOperand_NoMatch; 4363 4364 StringRef Str = Parser.getTok().getString(); 4365 int Slot = StringSwitch<int>(Str) 4366 .Case("p10", 0) 4367 .Case("p20", 1) 4368 .Case("p0", 2) 4369 .Default(-1); 4370 4371 SMLoc S = Parser.getTok().getLoc(); 4372 if (Slot == -1) 4373 return MatchOperand_ParseFail; 4374 4375 Parser.Lex(); 4376 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4377 AMDGPUOperand::ImmTyInterpSlot)); 4378 return MatchOperand_Success; 4379 } 4380 4381 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4382 if (getLexer().getKind() != AsmToken::Identifier) 4383 return MatchOperand_NoMatch; 4384 4385 StringRef Str = Parser.getTok().getString(); 4386 if (!Str.startswith("attr")) 4387 return MatchOperand_NoMatch; 4388 4389 StringRef Chan = Str.take_back(2); 4390 int AttrChan = StringSwitch<int>(Chan) 4391 .Case(".x", 0) 4392 .Case(".y", 1) 4393 .Case(".z", 2) 4394 .Case(".w", 3) 4395 .Default(-1); 4396 if (AttrChan == -1) 4397 return MatchOperand_ParseFail; 4398 4399 Str = Str.drop_back(2).drop_front(4); 4400 4401 uint8_t Attr; 4402 if (Str.getAsInteger(10, Attr)) 4403 return MatchOperand_ParseFail; 4404 4405 SMLoc S = Parser.getTok().getLoc(); 4406 Parser.Lex(); 4407 if (Attr > 63) { 4408 Error(S, "out of bounds attr"); 4409 return MatchOperand_Success; 4410 } 4411 4412 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4413 4414 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4415 AMDGPUOperand::ImmTyInterpAttr)); 4416 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4417 AMDGPUOperand::ImmTyAttrChan)); 4418 return MatchOperand_Success; 4419 } 4420 4421 void AMDGPUAsmParser::errorExpTgt() { 4422 Error(Parser.getTok().getLoc(), "invalid exp target"); 4423 } 4424 4425 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4426 uint8_t &Val) { 4427 if (Str == "null") { 4428 Val = 9; 4429 return MatchOperand_Success; 4430 } 4431 4432 if (Str.startswith("mrt")) { 4433 Str = Str.drop_front(3); 4434 if (Str == "z") { // == mrtz 4435 Val = 8; 4436 return MatchOperand_Success; 4437 } 4438 4439 if (Str.getAsInteger(10, Val)) 4440 return MatchOperand_ParseFail; 4441 4442 if (Val > 7) 4443 errorExpTgt(); 4444 4445 return MatchOperand_Success; 4446 } 4447 4448 if (Str.startswith("pos")) { 4449 Str = Str.drop_front(3); 4450 if (Str.getAsInteger(10, Val)) 4451 return MatchOperand_ParseFail; 4452 4453 if (Val > 3) 4454 errorExpTgt(); 4455 4456 Val += 12; 4457 return MatchOperand_Success; 4458 } 4459 4460 if (Str.startswith("param")) { 4461 Str = Str.drop_front(5); 4462 if (Str.getAsInteger(10, Val)) 4463 return MatchOperand_ParseFail; 4464 4465 if (Val >= 32) 4466 errorExpTgt(); 4467 4468 Val += 32; 4469 return MatchOperand_Success; 4470 } 4471 4472 if (Str.startswith("invalid_target_")) { 4473 Str = Str.drop_front(15); 4474 if (Str.getAsInteger(10, Val)) 4475 return MatchOperand_ParseFail; 4476 4477 errorExpTgt(); 4478 return MatchOperand_Success; 4479 } 4480 4481 return MatchOperand_NoMatch; 4482 } 4483 4484 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4485 uint8_t Val; 4486 StringRef Str = Parser.getTok().getString(); 4487 4488 auto Res = parseExpTgtImpl(Str, Val); 4489 if (Res != MatchOperand_Success) 4490 return Res; 4491 4492 SMLoc S = Parser.getTok().getLoc(); 4493 Parser.Lex(); 4494 4495 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4496 AMDGPUOperand::ImmTyExpTgt)); 4497 return MatchOperand_Success; 4498 } 4499 4500 OperandMatchResultTy 4501 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4502 using namespace llvm::AMDGPU::SendMsg; 4503 4504 int64_t Imm16Val = 0; 4505 SMLoc S = Parser.getTok().getLoc(); 4506 4507 switch(getLexer().getKind()) { 4508 default: 4509 return MatchOperand_NoMatch; 4510 case AsmToken::Integer: 4511 // The operand can be an integer value. 4512 if (getParser().parseAbsoluteExpression(Imm16Val)) 4513 return MatchOperand_NoMatch; 4514 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4515 Error(S, "invalid immediate: only 16-bit values are legal"); 4516 // Do not return error code, but create an imm operand anyway and proceed 4517 // to the next operand, if any. That avoids unneccessary error messages. 4518 } 4519 break; 4520 case AsmToken::Identifier: { 4521 OperandInfoTy Msg(ID_UNKNOWN_); 4522 OperandInfoTy Operation(OP_UNKNOWN_); 4523 int64_t StreamId = STREAM_ID_DEFAULT_; 4524 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4525 return MatchOperand_ParseFail; 4526 do { 4527 // Validate and encode message ID. 4528 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4529 || (Msg.Id == ID_GS_ALLOC_REQ && !isSI() && !isCI() && !isVI()) 4530 || Msg.Id == ID_SYSMSG)) { 4531 if (Msg.IsSymbolic) 4532 Error(S, "invalid/unsupported symbolic name of message"); 4533 else 4534 Error(S, "invalid/unsupported code of message"); 4535 break; 4536 } 4537 Imm16Val = (Msg.Id << ID_SHIFT_); 4538 // Validate and encode operation ID. 4539 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4540 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4541 if (Operation.IsSymbolic) 4542 Error(S, "invalid symbolic name of GS_OP"); 4543 else 4544 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4545 break; 4546 } 4547 if (Operation.Id == OP_GS_NOP 4548 && Msg.Id != ID_GS_DONE) { 4549 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4550 break; 4551 } 4552 Imm16Val |= (Operation.Id << OP_SHIFT_); 4553 } 4554 if (Msg.Id == ID_SYSMSG) { 4555 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4556 if (Operation.IsSymbolic) 4557 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4558 else 4559 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4560 break; 4561 } 4562 Imm16Val |= (Operation.Id << OP_SHIFT_); 4563 } 4564 // Validate and encode stream ID. 4565 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4566 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4567 Error(S, "invalid stream id: only 2-bit values are legal"); 4568 break; 4569 } 4570 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4571 } 4572 } while (false); 4573 } 4574 break; 4575 } 4576 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4577 return MatchOperand_Success; 4578 } 4579 4580 bool AMDGPUOperand::isSendMsg() const { 4581 return isImmTy(ImmTySendMsg); 4582 } 4583 4584 //===----------------------------------------------------------------------===// 4585 // parser helpers 4586 //===----------------------------------------------------------------------===// 4587 4588 bool 4589 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4590 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4591 } 4592 4593 bool 4594 AMDGPUAsmParser::isId(const StringRef Id) const { 4595 return isId(getToken(), Id); 4596 } 4597 4598 bool 4599 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4600 return getTokenKind() == Kind; 4601 } 4602 4603 bool 4604 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4605 if (isId(Id)) { 4606 lex(); 4607 return true; 4608 } 4609 return false; 4610 } 4611 4612 bool 4613 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4614 if (isToken(Kind)) { 4615 lex(); 4616 return true; 4617 } 4618 return false; 4619 } 4620 4621 bool 4622 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4623 const StringRef ErrMsg) { 4624 if (!trySkipToken(Kind)) { 4625 Error(getLoc(), ErrMsg); 4626 return false; 4627 } 4628 return true; 4629 } 4630 4631 bool 4632 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4633 return !getParser().parseAbsoluteExpression(Imm); 4634 } 4635 4636 bool 4637 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4638 if (isToken(AsmToken::String)) { 4639 Val = getToken().getStringContents(); 4640 lex(); 4641 return true; 4642 } else { 4643 Error(getLoc(), ErrMsg); 4644 return false; 4645 } 4646 } 4647 4648 AsmToken 4649 AMDGPUAsmParser::getToken() const { 4650 return Parser.getTok(); 4651 } 4652 4653 AsmToken 4654 AMDGPUAsmParser::peekToken() { 4655 return getLexer().peekTok(); 4656 } 4657 4658 void 4659 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 4660 auto TokCount = getLexer().peekTokens(Tokens); 4661 4662 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 4663 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 4664 } 4665 4666 AsmToken::TokenKind 4667 AMDGPUAsmParser::getTokenKind() const { 4668 return getLexer().getKind(); 4669 } 4670 4671 SMLoc 4672 AMDGPUAsmParser::getLoc() const { 4673 return getToken().getLoc(); 4674 } 4675 4676 StringRef 4677 AMDGPUAsmParser::getTokenStr() const { 4678 return getToken().getString(); 4679 } 4680 4681 void 4682 AMDGPUAsmParser::lex() { 4683 Parser.Lex(); 4684 } 4685 4686 //===----------------------------------------------------------------------===// 4687 // swizzle 4688 //===----------------------------------------------------------------------===// 4689 4690 LLVM_READNONE 4691 static unsigned 4692 encodeBitmaskPerm(const unsigned AndMask, 4693 const unsigned OrMask, 4694 const unsigned XorMask) { 4695 using namespace llvm::AMDGPU::Swizzle; 4696 4697 return BITMASK_PERM_ENC | 4698 (AndMask << BITMASK_AND_SHIFT) | 4699 (OrMask << BITMASK_OR_SHIFT) | 4700 (XorMask << BITMASK_XOR_SHIFT); 4701 } 4702 4703 bool 4704 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4705 const unsigned MinVal, 4706 const unsigned MaxVal, 4707 const StringRef ErrMsg) { 4708 for (unsigned i = 0; i < OpNum; ++i) { 4709 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4710 return false; 4711 } 4712 SMLoc ExprLoc = Parser.getTok().getLoc(); 4713 if (!parseExpr(Op[i])) { 4714 return false; 4715 } 4716 if (Op[i] < MinVal || Op[i] > MaxVal) { 4717 Error(ExprLoc, ErrMsg); 4718 return false; 4719 } 4720 } 4721 4722 return true; 4723 } 4724 4725 bool 4726 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4727 using namespace llvm::AMDGPU::Swizzle; 4728 4729 int64_t Lane[LANE_NUM]; 4730 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4731 "expected a 2-bit lane id")) { 4732 Imm = QUAD_PERM_ENC; 4733 for (unsigned I = 0; I < LANE_NUM; ++I) { 4734 Imm |= Lane[I] << (LANE_SHIFT * I); 4735 } 4736 return true; 4737 } 4738 return false; 4739 } 4740 4741 bool 4742 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4743 using namespace llvm::AMDGPU::Swizzle; 4744 4745 SMLoc S = Parser.getTok().getLoc(); 4746 int64_t GroupSize; 4747 int64_t LaneIdx; 4748 4749 if (!parseSwizzleOperands(1, &GroupSize, 4750 2, 32, 4751 "group size must be in the interval [2,32]")) { 4752 return false; 4753 } 4754 if (!isPowerOf2_64(GroupSize)) { 4755 Error(S, "group size must be a power of two"); 4756 return false; 4757 } 4758 if (parseSwizzleOperands(1, &LaneIdx, 4759 0, GroupSize - 1, 4760 "lane id must be in the interval [0,group size - 1]")) { 4761 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4762 return true; 4763 } 4764 return false; 4765 } 4766 4767 bool 4768 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4769 using namespace llvm::AMDGPU::Swizzle; 4770 4771 SMLoc S = Parser.getTok().getLoc(); 4772 int64_t GroupSize; 4773 4774 if (!parseSwizzleOperands(1, &GroupSize, 4775 2, 32, "group size must be in the interval [2,32]")) { 4776 return false; 4777 } 4778 if (!isPowerOf2_64(GroupSize)) { 4779 Error(S, "group size must be a power of two"); 4780 return false; 4781 } 4782 4783 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4784 return true; 4785 } 4786 4787 bool 4788 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4789 using namespace llvm::AMDGPU::Swizzle; 4790 4791 SMLoc S = Parser.getTok().getLoc(); 4792 int64_t GroupSize; 4793 4794 if (!parseSwizzleOperands(1, &GroupSize, 4795 1, 16, "group size must be in the interval [1,16]")) { 4796 return false; 4797 } 4798 if (!isPowerOf2_64(GroupSize)) { 4799 Error(S, "group size must be a power of two"); 4800 return false; 4801 } 4802 4803 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4804 return true; 4805 } 4806 4807 bool 4808 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4809 using namespace llvm::AMDGPU::Swizzle; 4810 4811 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4812 return false; 4813 } 4814 4815 StringRef Ctl; 4816 SMLoc StrLoc = Parser.getTok().getLoc(); 4817 if (!parseString(Ctl)) { 4818 return false; 4819 } 4820 if (Ctl.size() != BITMASK_WIDTH) { 4821 Error(StrLoc, "expected a 5-character mask"); 4822 return false; 4823 } 4824 4825 unsigned AndMask = 0; 4826 unsigned OrMask = 0; 4827 unsigned XorMask = 0; 4828 4829 for (size_t i = 0; i < Ctl.size(); ++i) { 4830 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4831 switch(Ctl[i]) { 4832 default: 4833 Error(StrLoc, "invalid mask"); 4834 return false; 4835 case '0': 4836 break; 4837 case '1': 4838 OrMask |= Mask; 4839 break; 4840 case 'p': 4841 AndMask |= Mask; 4842 break; 4843 case 'i': 4844 AndMask |= Mask; 4845 XorMask |= Mask; 4846 break; 4847 } 4848 } 4849 4850 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4851 return true; 4852 } 4853 4854 bool 4855 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4856 4857 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4858 4859 if (!parseExpr(Imm)) { 4860 return false; 4861 } 4862 if (!isUInt<16>(Imm)) { 4863 Error(OffsetLoc, "expected a 16-bit offset"); 4864 return false; 4865 } 4866 return true; 4867 } 4868 4869 bool 4870 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4871 using namespace llvm::AMDGPU::Swizzle; 4872 4873 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4874 4875 SMLoc ModeLoc = Parser.getTok().getLoc(); 4876 bool Ok = false; 4877 4878 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4879 Ok = parseSwizzleQuadPerm(Imm); 4880 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4881 Ok = parseSwizzleBitmaskPerm(Imm); 4882 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4883 Ok = parseSwizzleBroadcast(Imm); 4884 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4885 Ok = parseSwizzleSwap(Imm); 4886 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4887 Ok = parseSwizzleReverse(Imm); 4888 } else { 4889 Error(ModeLoc, "expected a swizzle mode"); 4890 } 4891 4892 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4893 } 4894 4895 return false; 4896 } 4897 4898 OperandMatchResultTy 4899 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4900 SMLoc S = Parser.getTok().getLoc(); 4901 int64_t Imm = 0; 4902 4903 if (trySkipId("offset")) { 4904 4905 bool Ok = false; 4906 if (skipToken(AsmToken::Colon, "expected a colon")) { 4907 if (trySkipId("swizzle")) { 4908 Ok = parseSwizzleMacro(Imm); 4909 } else { 4910 Ok = parseSwizzleOffset(Imm); 4911 } 4912 } 4913 4914 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4915 4916 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4917 } else { 4918 // Swizzle "offset" operand is optional. 4919 // If it is omitted, try parsing other optional operands. 4920 return parseOptionalOpr(Operands); 4921 } 4922 } 4923 4924 bool 4925 AMDGPUOperand::isSwizzle() const { 4926 return isImmTy(ImmTySwizzle); 4927 } 4928 4929 //===----------------------------------------------------------------------===// 4930 // VGPR Index Mode 4931 //===----------------------------------------------------------------------===// 4932 4933 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 4934 4935 using namespace llvm::AMDGPU::VGPRIndexMode; 4936 4937 if (trySkipToken(AsmToken::RParen)) { 4938 return OFF; 4939 } 4940 4941 int64_t Imm = 0; 4942 4943 while (true) { 4944 unsigned Mode = 0; 4945 SMLoc S = Parser.getTok().getLoc(); 4946 4947 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 4948 if (trySkipId(IdSymbolic[ModeId])) { 4949 Mode = 1 << ModeId; 4950 break; 4951 } 4952 } 4953 4954 if (Mode == 0) { 4955 Error(S, (Imm == 0)? 4956 "expected a VGPR index mode or a closing parenthesis" : 4957 "expected a VGPR index mode"); 4958 break; 4959 } 4960 4961 if (Imm & Mode) { 4962 Error(S, "duplicate VGPR index mode"); 4963 break; 4964 } 4965 Imm |= Mode; 4966 4967 if (trySkipToken(AsmToken::RParen)) 4968 break; 4969 if (!skipToken(AsmToken::Comma, 4970 "expected a comma or a closing parenthesis")) 4971 break; 4972 } 4973 4974 return Imm; 4975 } 4976 4977 OperandMatchResultTy 4978 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 4979 4980 int64_t Imm = 0; 4981 SMLoc S = Parser.getTok().getLoc(); 4982 4983 if (getLexer().getKind() == AsmToken::Identifier && 4984 Parser.getTok().getString() == "gpr_idx" && 4985 getLexer().peekTok().is(AsmToken::LParen)) { 4986 4987 Parser.Lex(); 4988 Parser.Lex(); 4989 4990 // If parse failed, trigger an error but do not return error code 4991 // to avoid excessive error messages. 4992 Imm = parseGPRIdxMacro(); 4993 4994 } else { 4995 if (getParser().parseAbsoluteExpression(Imm)) 4996 return MatchOperand_NoMatch; 4997 if (Imm < 0 || !isUInt<4>(Imm)) { 4998 Error(S, "invalid immediate: only 4-bit values are legal"); 4999 } 5000 } 5001 5002 Operands.push_back( 5003 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5004 return MatchOperand_Success; 5005 } 5006 5007 bool AMDGPUOperand::isGPRIdxMode() const { 5008 return isImmTy(ImmTyGprIdxMode); 5009 } 5010 5011 //===----------------------------------------------------------------------===// 5012 // sopp branch targets 5013 //===----------------------------------------------------------------------===// 5014 5015 OperandMatchResultTy 5016 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5017 SMLoc S = Parser.getTok().getLoc(); 5018 5019 switch (getLexer().getKind()) { 5020 default: return MatchOperand_ParseFail; 5021 case AsmToken::Integer: { 5022 int64_t Imm; 5023 if (getParser().parseAbsoluteExpression(Imm)) 5024 return MatchOperand_ParseFail; 5025 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5026 return MatchOperand_Success; 5027 } 5028 5029 case AsmToken::Identifier: 5030 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5031 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5032 Parser.getTok().getString()), getContext()), S)); 5033 Parser.Lex(); 5034 return MatchOperand_Success; 5035 } 5036 } 5037 5038 //===----------------------------------------------------------------------===// 5039 // mubuf 5040 //===----------------------------------------------------------------------===// 5041 5042 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5043 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5044 } 5045 5046 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5047 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5048 } 5049 5050 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5051 const OperandVector &Operands, 5052 bool IsAtomic, 5053 bool IsAtomicReturn, 5054 bool IsLds) { 5055 bool IsLdsOpcode = IsLds; 5056 bool HasLdsModifier = false; 5057 OptionalImmIndexMap OptionalIdx; 5058 assert(IsAtomicReturn ? IsAtomic : true); 5059 unsigned FirstOperandIdx = 1; 5060 5061 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5062 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5063 5064 // Add the register arguments 5065 if (Op.isReg()) { 5066 Op.addRegOperands(Inst, 1); 5067 // Insert a tied src for atomic return dst. 5068 // This cannot be postponed as subsequent calls to 5069 // addImmOperands rely on correct number of MC operands. 5070 if (IsAtomicReturn && i == FirstOperandIdx) 5071 Op.addRegOperands(Inst, 1); 5072 continue; 5073 } 5074 5075 // Handle the case where soffset is an immediate 5076 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5077 Op.addImmOperands(Inst, 1); 5078 continue; 5079 } 5080 5081 HasLdsModifier = Op.isLDS(); 5082 5083 // Handle tokens like 'offen' which are sometimes hard-coded into the 5084 // asm string. There are no MCInst operands for these. 5085 if (Op.isToken()) { 5086 continue; 5087 } 5088 assert(Op.isImm()); 5089 5090 // Handle optional arguments 5091 OptionalIdx[Op.getImmTy()] = i; 5092 } 5093 5094 // This is a workaround for an llvm quirk which may result in an 5095 // incorrect instruction selection. Lds and non-lds versions of 5096 // MUBUF instructions are identical except that lds versions 5097 // have mandatory 'lds' modifier. However this modifier follows 5098 // optional modifiers and llvm asm matcher regards this 'lds' 5099 // modifier as an optional one. As a result, an lds version 5100 // of opcode may be selected even if it has no 'lds' modifier. 5101 if (IsLdsOpcode && !HasLdsModifier) { 5102 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5103 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5104 Inst.setOpcode(NoLdsOpcode); 5105 IsLdsOpcode = false; 5106 } 5107 } 5108 5109 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5110 if (!IsAtomic) { // glc is hard-coded. 5111 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5112 } 5113 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5114 5115 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5116 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5117 } 5118 } 5119 5120 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5121 OptionalImmIndexMap OptionalIdx; 5122 5123 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5124 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5125 5126 // Add the register arguments 5127 if (Op.isReg()) { 5128 Op.addRegOperands(Inst, 1); 5129 continue; 5130 } 5131 5132 // Handle the case where soffset is an immediate 5133 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5134 Op.addImmOperands(Inst, 1); 5135 continue; 5136 } 5137 5138 // Handle tokens like 'offen' which are sometimes hard-coded into the 5139 // asm string. There are no MCInst operands for these. 5140 if (Op.isToken()) { 5141 continue; 5142 } 5143 assert(Op.isImm()); 5144 5145 // Handle optional arguments 5146 OptionalIdx[Op.getImmTy()] = i; 5147 } 5148 5149 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5150 AMDGPUOperand::ImmTyOffset); 5151 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5152 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5153 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5154 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5155 } 5156 5157 //===----------------------------------------------------------------------===// 5158 // mimg 5159 //===----------------------------------------------------------------------===// 5160 5161 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5162 bool IsAtomic) { 5163 unsigned I = 1; 5164 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5165 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5166 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5167 } 5168 5169 if (IsAtomic) { 5170 // Add src, same as dst 5171 assert(Desc.getNumDefs() == 1); 5172 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5173 } 5174 5175 OptionalImmIndexMap OptionalIdx; 5176 5177 for (unsigned E = Operands.size(); I != E; ++I) { 5178 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5179 5180 // Add the register arguments 5181 if (Op.isReg()) { 5182 Op.addRegOperands(Inst, 1); 5183 } else if (Op.isImmModifier()) { 5184 OptionalIdx[Op.getImmTy()] = I; 5185 } else { 5186 llvm_unreachable("unexpected operand type"); 5187 } 5188 } 5189 5190 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5191 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5192 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5193 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5194 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5195 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5196 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5197 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5199 } 5200 5201 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5202 cvtMIMG(Inst, Operands, true); 5203 } 5204 5205 //===----------------------------------------------------------------------===// 5206 // smrd 5207 //===----------------------------------------------------------------------===// 5208 5209 bool AMDGPUOperand::isSMRDOffset8() const { 5210 return isImm() && isUInt<8>(getImm()); 5211 } 5212 5213 bool AMDGPUOperand::isSMRDOffset20() const { 5214 return isImm() && isUInt<20>(getImm()); 5215 } 5216 5217 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5218 // 32-bit literals are only supported on CI and we only want to use them 5219 // when the offset is > 8-bits. 5220 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5221 } 5222 5223 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5224 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5225 } 5226 5227 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5228 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5229 } 5230 5231 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5232 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5233 } 5234 5235 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5236 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5237 } 5238 5239 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5240 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5241 } 5242 5243 //===----------------------------------------------------------------------===// 5244 // vop3 5245 //===----------------------------------------------------------------------===// 5246 5247 static bool ConvertOmodMul(int64_t &Mul) { 5248 if (Mul != 1 && Mul != 2 && Mul != 4) 5249 return false; 5250 5251 Mul >>= 1; 5252 return true; 5253 } 5254 5255 static bool ConvertOmodDiv(int64_t &Div) { 5256 if (Div == 1) { 5257 Div = 0; 5258 return true; 5259 } 5260 5261 if (Div == 2) { 5262 Div = 3; 5263 return true; 5264 } 5265 5266 return false; 5267 } 5268 5269 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5270 if (BoundCtrl == 0) { 5271 BoundCtrl = 1; 5272 return true; 5273 } 5274 5275 if (BoundCtrl == -1) { 5276 BoundCtrl = 0; 5277 return true; 5278 } 5279 5280 return false; 5281 } 5282 5283 // Note: the order in this table matches the order of operands in AsmString. 5284 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5285 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5286 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5287 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5288 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5289 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5290 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5291 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5292 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5293 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5294 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5295 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5296 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5297 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5298 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5299 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5300 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5301 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5302 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5303 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5304 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5305 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5306 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5307 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5308 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5309 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5310 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5311 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5312 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5313 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5314 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5315 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5316 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5317 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5318 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5319 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5320 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5321 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5322 }; 5323 5324 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5325 unsigned size = Operands.size(); 5326 assert(size > 0); 5327 5328 OperandMatchResultTy res = parseOptionalOpr(Operands); 5329 5330 // This is a hack to enable hardcoded mandatory operands which follow 5331 // optional operands. 5332 // 5333 // Current design assumes that all operands after the first optional operand 5334 // are also optional. However implementation of some instructions violates 5335 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5336 // 5337 // To alleviate this problem, we have to (implicitly) parse extra operands 5338 // to make sure autogenerated parser of custom operands never hit hardcoded 5339 // mandatory operands. 5340 5341 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5342 5343 // We have parsed the first optional operand. 5344 // Parse as many operands as necessary to skip all mandatory operands. 5345 5346 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5347 if (res != MatchOperand_Success || 5348 getLexer().is(AsmToken::EndOfStatement)) break; 5349 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5350 res = parseOptionalOpr(Operands); 5351 } 5352 } 5353 5354 return res; 5355 } 5356 5357 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5358 OperandMatchResultTy res; 5359 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5360 // try to parse any optional operand here 5361 if (Op.IsBit) { 5362 res = parseNamedBit(Op.Name, Operands, Op.Type); 5363 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5364 res = parseOModOperand(Operands); 5365 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5366 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5367 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5368 res = parseSDWASel(Operands, Op.Name, Op.Type); 5369 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5370 res = parseSDWADstUnused(Operands); 5371 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5372 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5373 Op.Type == AMDGPUOperand::ImmTyNegLo || 5374 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5375 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5376 Op.ConvertResult); 5377 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5378 res = parseDfmtNfmt(Operands); 5379 } else { 5380 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5381 } 5382 if (res != MatchOperand_NoMatch) { 5383 return res; 5384 } 5385 } 5386 return MatchOperand_NoMatch; 5387 } 5388 5389 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5390 StringRef Name = Parser.getTok().getString(); 5391 if (Name == "mul") { 5392 return parseIntWithPrefix("mul", Operands, 5393 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5394 } 5395 5396 if (Name == "div") { 5397 return parseIntWithPrefix("div", Operands, 5398 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5399 } 5400 5401 return MatchOperand_NoMatch; 5402 } 5403 5404 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5405 cvtVOP3P(Inst, Operands); 5406 5407 int Opc = Inst.getOpcode(); 5408 5409 int SrcNum; 5410 const int Ops[] = { AMDGPU::OpName::src0, 5411 AMDGPU::OpName::src1, 5412 AMDGPU::OpName::src2 }; 5413 for (SrcNum = 0; 5414 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5415 ++SrcNum); 5416 assert(SrcNum > 0); 5417 5418 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5419 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5420 5421 if ((OpSel & (1 << SrcNum)) != 0) { 5422 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5423 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5424 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5425 } 5426 } 5427 5428 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5429 // 1. This operand is input modifiers 5430 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5431 // 2. This is not last operand 5432 && Desc.NumOperands > (OpNum + 1) 5433 // 3. Next operand is register class 5434 && Desc.OpInfo[OpNum + 1].RegClass != -1 5435 // 4. Next register is not tied to any other operand 5436 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5437 } 5438 5439 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5440 { 5441 OptionalImmIndexMap OptionalIdx; 5442 unsigned Opc = Inst.getOpcode(); 5443 5444 unsigned I = 1; 5445 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5446 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5447 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5448 } 5449 5450 for (unsigned E = Operands.size(); I != E; ++I) { 5451 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5452 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5453 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5454 } else if (Op.isInterpSlot() || 5455 Op.isInterpAttr() || 5456 Op.isAttrChan()) { 5457 Inst.addOperand(MCOperand::createImm(Op.getImm())); 5458 } else if (Op.isImmModifier()) { 5459 OptionalIdx[Op.getImmTy()] = I; 5460 } else { 5461 llvm_unreachable("unhandled operand type"); 5462 } 5463 } 5464 5465 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5466 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5467 } 5468 5469 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5470 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5471 } 5472 5473 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5474 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5475 } 5476 } 5477 5478 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5479 OptionalImmIndexMap &OptionalIdx) { 5480 unsigned Opc = Inst.getOpcode(); 5481 5482 unsigned I = 1; 5483 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5484 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5485 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5486 } 5487 5488 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5489 // This instruction has src modifiers 5490 for (unsigned E = Operands.size(); I != E; ++I) { 5491 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5492 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5493 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5494 } else if (Op.isImmModifier()) { 5495 OptionalIdx[Op.getImmTy()] = I; 5496 } else if (Op.isRegOrImm()) { 5497 Op.addRegOrImmOperands(Inst, 1); 5498 } else { 5499 llvm_unreachable("unhandled operand type"); 5500 } 5501 } 5502 } else { 5503 // No src modifiers 5504 for (unsigned E = Operands.size(); I != E; ++I) { 5505 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5506 if (Op.isMod()) { 5507 OptionalIdx[Op.getImmTy()] = I; 5508 } else { 5509 Op.addRegOrImmOperands(Inst, 1); 5510 } 5511 } 5512 } 5513 5514 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5515 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5516 } 5517 5518 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5519 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5520 } 5521 5522 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5523 // it has src2 register operand that is tied to dst operand 5524 // we don't allow modifiers for this operand in assembler so src2_modifiers 5525 // should be 0. 5526 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5527 Opc == AMDGPU::V_MAC_F32_e64_vi || 5528 Opc == AMDGPU::V_MAC_F16_e64_vi || 5529 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5530 auto it = Inst.begin(); 5531 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5532 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5533 ++it; 5534 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5535 } 5536 } 5537 5538 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5539 OptionalImmIndexMap OptionalIdx; 5540 cvtVOP3(Inst, Operands, OptionalIdx); 5541 } 5542 5543 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5544 const OperandVector &Operands) { 5545 OptionalImmIndexMap OptIdx; 5546 const int Opc = Inst.getOpcode(); 5547 const MCInstrDesc &Desc = MII.get(Opc); 5548 5549 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5550 5551 cvtVOP3(Inst, Operands, OptIdx); 5552 5553 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5554 assert(!IsPacked); 5555 Inst.addOperand(Inst.getOperand(0)); 5556 } 5557 5558 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5559 // instruction, and then figure out where to actually put the modifiers 5560 5561 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5562 5563 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5564 if (OpSelHiIdx != -1) { 5565 int DefaultVal = IsPacked ? -1 : 0; 5566 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5567 DefaultVal); 5568 } 5569 5570 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5571 if (NegLoIdx != -1) { 5572 assert(IsPacked); 5573 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5574 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5575 } 5576 5577 const int Ops[] = { AMDGPU::OpName::src0, 5578 AMDGPU::OpName::src1, 5579 AMDGPU::OpName::src2 }; 5580 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5581 AMDGPU::OpName::src1_modifiers, 5582 AMDGPU::OpName::src2_modifiers }; 5583 5584 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5585 5586 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5587 unsigned OpSelHi = 0; 5588 unsigned NegLo = 0; 5589 unsigned NegHi = 0; 5590 5591 if (OpSelHiIdx != -1) { 5592 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5593 } 5594 5595 if (NegLoIdx != -1) { 5596 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5597 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5598 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5599 } 5600 5601 for (int J = 0; J < 3; ++J) { 5602 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5603 if (OpIdx == -1) 5604 break; 5605 5606 uint32_t ModVal = 0; 5607 5608 if ((OpSel & (1 << J)) != 0) 5609 ModVal |= SISrcMods::OP_SEL_0; 5610 5611 if ((OpSelHi & (1 << J)) != 0) 5612 ModVal |= SISrcMods::OP_SEL_1; 5613 5614 if ((NegLo & (1 << J)) != 0) 5615 ModVal |= SISrcMods::NEG; 5616 5617 if ((NegHi & (1 << J)) != 0) 5618 ModVal |= SISrcMods::NEG_HI; 5619 5620 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5621 5622 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5623 } 5624 } 5625 5626 //===----------------------------------------------------------------------===// 5627 // dpp 5628 //===----------------------------------------------------------------------===// 5629 5630 bool AMDGPUOperand::isDPPCtrl() const { 5631 using namespace AMDGPU::DPP; 5632 5633 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5634 if (result) { 5635 int64_t Imm = getImm(); 5636 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5637 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5638 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5639 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5640 (Imm == DppCtrl::WAVE_SHL1) || 5641 (Imm == DppCtrl::WAVE_ROL1) || 5642 (Imm == DppCtrl::WAVE_SHR1) || 5643 (Imm == DppCtrl::WAVE_ROR1) || 5644 (Imm == DppCtrl::ROW_MIRROR) || 5645 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5646 (Imm == DppCtrl::BCAST15) || 5647 (Imm == DppCtrl::BCAST31); 5648 } 5649 return false; 5650 } 5651 5652 bool AMDGPUOperand::isS16Imm() const { 5653 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5654 } 5655 5656 bool AMDGPUOperand::isU16Imm() const { 5657 return isImm() && isUInt<16>(getImm()); 5658 } 5659 5660 OperandMatchResultTy 5661 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5662 using namespace AMDGPU::DPP; 5663 5664 SMLoc S = Parser.getTok().getLoc(); 5665 StringRef Prefix; 5666 int64_t Int; 5667 5668 if (getLexer().getKind() == AsmToken::Identifier) { 5669 Prefix = Parser.getTok().getString(); 5670 } else { 5671 return MatchOperand_NoMatch; 5672 } 5673 5674 if (Prefix == "row_mirror") { 5675 Int = DppCtrl::ROW_MIRROR; 5676 Parser.Lex(); 5677 } else if (Prefix == "row_half_mirror") { 5678 Int = DppCtrl::ROW_HALF_MIRROR; 5679 Parser.Lex(); 5680 } else { 5681 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5682 if (Prefix != "quad_perm" 5683 && Prefix != "row_shl" 5684 && Prefix != "row_shr" 5685 && Prefix != "row_ror" 5686 && Prefix != "wave_shl" 5687 && Prefix != "wave_rol" 5688 && Prefix != "wave_shr" 5689 && Prefix != "wave_ror" 5690 && Prefix != "row_bcast") { 5691 return MatchOperand_NoMatch; 5692 } 5693 5694 Parser.Lex(); 5695 if (getLexer().isNot(AsmToken::Colon)) 5696 return MatchOperand_ParseFail; 5697 5698 if (Prefix == "quad_perm") { 5699 // quad_perm:[%d,%d,%d,%d] 5700 Parser.Lex(); 5701 if (getLexer().isNot(AsmToken::LBrac)) 5702 return MatchOperand_ParseFail; 5703 Parser.Lex(); 5704 5705 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5706 return MatchOperand_ParseFail; 5707 5708 for (int i = 0; i < 3; ++i) { 5709 if (getLexer().isNot(AsmToken::Comma)) 5710 return MatchOperand_ParseFail; 5711 Parser.Lex(); 5712 5713 int64_t Temp; 5714 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5715 return MatchOperand_ParseFail; 5716 const int shift = i*2 + 2; 5717 Int += (Temp << shift); 5718 } 5719 5720 if (getLexer().isNot(AsmToken::RBrac)) 5721 return MatchOperand_ParseFail; 5722 Parser.Lex(); 5723 } else { 5724 // sel:%d 5725 Parser.Lex(); 5726 if (getParser().parseAbsoluteExpression(Int)) 5727 return MatchOperand_ParseFail; 5728 5729 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5730 Int |= DppCtrl::ROW_SHL0; 5731 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5732 Int |= DppCtrl::ROW_SHR0; 5733 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5734 Int |= DppCtrl::ROW_ROR0; 5735 } else if (Prefix == "wave_shl" && 1 == Int) { 5736 Int = DppCtrl::WAVE_SHL1; 5737 } else if (Prefix == "wave_rol" && 1 == Int) { 5738 Int = DppCtrl::WAVE_ROL1; 5739 } else if (Prefix == "wave_shr" && 1 == Int) { 5740 Int = DppCtrl::WAVE_SHR1; 5741 } else if (Prefix == "wave_ror" && 1 == Int) { 5742 Int = DppCtrl::WAVE_ROR1; 5743 } else if (Prefix == "row_bcast") { 5744 if (Int == 15) { 5745 Int = DppCtrl::BCAST15; 5746 } else if (Int == 31) { 5747 Int = DppCtrl::BCAST31; 5748 } else { 5749 return MatchOperand_ParseFail; 5750 } 5751 } else { 5752 return MatchOperand_ParseFail; 5753 } 5754 } 5755 } 5756 5757 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5758 return MatchOperand_Success; 5759 } 5760 5761 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5762 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5763 } 5764 5765 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 5766 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 5767 } 5768 5769 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5770 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5771 } 5772 5773 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5774 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5775 } 5776 5777 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5778 OptionalImmIndexMap OptionalIdx; 5779 5780 unsigned I = 1; 5781 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5782 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5783 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5784 } 5785 5786 for (unsigned E = Operands.size(); I != E; ++I) { 5787 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5788 MCOI::TIED_TO); 5789 if (TiedTo != -1) { 5790 assert((unsigned)TiedTo < Inst.getNumOperands()); 5791 // handle tied old or src2 for MAC instructions 5792 Inst.addOperand(Inst.getOperand(TiedTo)); 5793 } 5794 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5795 // Add the register arguments 5796 if (Op.isReg() && Op.getReg() == AMDGPU::VCC) { 5797 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5798 // Skip it. 5799 continue; 5800 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5801 Op.addRegWithFPInputModsOperands(Inst, 2); 5802 } else if (Op.isDPPCtrl()) { 5803 Op.addImmOperands(Inst, 1); 5804 } else if (Op.isImm()) { 5805 // Handle optional arguments 5806 OptionalIdx[Op.getImmTy()] = I; 5807 } else { 5808 llvm_unreachable("Invalid operand type"); 5809 } 5810 } 5811 5812 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5813 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5815 } 5816 5817 //===----------------------------------------------------------------------===// 5818 // sdwa 5819 //===----------------------------------------------------------------------===// 5820 5821 OperandMatchResultTy 5822 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5823 AMDGPUOperand::ImmTy Type) { 5824 using namespace llvm::AMDGPU::SDWA; 5825 5826 SMLoc S = Parser.getTok().getLoc(); 5827 StringRef Value; 5828 OperandMatchResultTy res; 5829 5830 res = parseStringWithPrefix(Prefix, Value); 5831 if (res != MatchOperand_Success) { 5832 return res; 5833 } 5834 5835 int64_t Int; 5836 Int = StringSwitch<int64_t>(Value) 5837 .Case("BYTE_0", SdwaSel::BYTE_0) 5838 .Case("BYTE_1", SdwaSel::BYTE_1) 5839 .Case("BYTE_2", SdwaSel::BYTE_2) 5840 .Case("BYTE_3", SdwaSel::BYTE_3) 5841 .Case("WORD_0", SdwaSel::WORD_0) 5842 .Case("WORD_1", SdwaSel::WORD_1) 5843 .Case("DWORD", SdwaSel::DWORD) 5844 .Default(0xffffffff); 5845 Parser.Lex(); // eat last token 5846 5847 if (Int == 0xffffffff) { 5848 return MatchOperand_ParseFail; 5849 } 5850 5851 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5852 return MatchOperand_Success; 5853 } 5854 5855 OperandMatchResultTy 5856 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5857 using namespace llvm::AMDGPU::SDWA; 5858 5859 SMLoc S = Parser.getTok().getLoc(); 5860 StringRef Value; 5861 OperandMatchResultTy res; 5862 5863 res = parseStringWithPrefix("dst_unused", Value); 5864 if (res != MatchOperand_Success) { 5865 return res; 5866 } 5867 5868 int64_t Int; 5869 Int = StringSwitch<int64_t>(Value) 5870 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5871 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5872 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5873 .Default(0xffffffff); 5874 Parser.Lex(); // eat last token 5875 5876 if (Int == 0xffffffff) { 5877 return MatchOperand_ParseFail; 5878 } 5879 5880 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5881 return MatchOperand_Success; 5882 } 5883 5884 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5885 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5886 } 5887 5888 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5889 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5890 } 5891 5892 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5893 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5894 } 5895 5896 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5897 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5898 } 5899 5900 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5901 uint64_t BasicInstType, bool skipVcc) { 5902 using namespace llvm::AMDGPU::SDWA; 5903 5904 OptionalImmIndexMap OptionalIdx; 5905 bool skippedVcc = false; 5906 5907 unsigned I = 1; 5908 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5909 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5910 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5911 } 5912 5913 for (unsigned E = Operands.size(); I != E; ++I) { 5914 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5915 if (skipVcc && !skippedVcc && Op.isReg() && Op.getReg() == AMDGPU::VCC) { 5916 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5917 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5918 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5919 // Skip VCC only if we didn't skip it on previous iteration. 5920 if (BasicInstType == SIInstrFlags::VOP2 && 5921 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5922 skippedVcc = true; 5923 continue; 5924 } else if (BasicInstType == SIInstrFlags::VOPC && 5925 Inst.getNumOperands() == 0) { 5926 skippedVcc = true; 5927 continue; 5928 } 5929 } 5930 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5931 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5932 } else if (Op.isImm()) { 5933 // Handle optional arguments 5934 OptionalIdx[Op.getImmTy()] = I; 5935 } else { 5936 llvm_unreachable("Invalid operand type"); 5937 } 5938 skippedVcc = false; 5939 } 5940 5941 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5942 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5943 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5944 switch (BasicInstType) { 5945 case SIInstrFlags::VOP1: 5946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5947 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5948 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5949 } 5950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5952 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5953 break; 5954 5955 case SIInstrFlags::VOP2: 5956 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5957 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5958 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5959 } 5960 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5961 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5962 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5963 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5964 break; 5965 5966 case SIInstrFlags::VOPC: 5967 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5968 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5969 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5970 break; 5971 5972 default: 5973 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5974 } 5975 } 5976 5977 // special case v_mac_{f16, f32}: 5978 // it has src2 register operand that is tied to dst operand 5979 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5980 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5981 auto it = Inst.begin(); 5982 std::advance( 5983 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5984 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5985 } 5986 } 5987 5988 /// Force static initialization. 5989 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5990 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5991 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5992 } 5993 5994 #define GET_REGISTER_MATCHER 5995 #define GET_MATCHER_IMPLEMENTATION 5996 #define GET_MNEMONIC_SPELL_CHECKER 5997 #include "AMDGPUGenAsmMatcher.inc" 5998 5999 // This fuction should be defined after auto-generated include so that we have 6000 // MatchClassKind enum defined 6001 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6002 unsigned Kind) { 6003 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6004 // But MatchInstructionImpl() expects to meet token and fails to validate 6005 // operand. This method checks if we are given immediate operand but expect to 6006 // get corresponding token. 6007 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6008 switch (Kind) { 6009 case MCK_addr64: 6010 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6011 case MCK_gds: 6012 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6013 case MCK_lds: 6014 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6015 case MCK_glc: 6016 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6017 case MCK_idxen: 6018 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6019 case MCK_offen: 6020 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6021 case MCK_SSrcB32: 6022 // When operands have expression values, they will return true for isToken, 6023 // because it is not possible to distinguish between a token and an 6024 // expression at parse time. MatchInstructionImpl() will always try to 6025 // match an operand as a token, when isToken returns true, and when the 6026 // name of the expression is not a valid token, the match will fail, 6027 // so we need to handle it here. 6028 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6029 case MCK_SSrcF32: 6030 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6031 case MCK_SoppBrTarget: 6032 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6033 case MCK_VReg32OrOff: 6034 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6035 case MCK_InterpSlot: 6036 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6037 case MCK_Attr: 6038 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6039 case MCK_AttrChan: 6040 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6041 default: 6042 return Match_InvalidOperand; 6043 } 6044 } 6045 6046 //===----------------------------------------------------------------------===// 6047 // endpgm 6048 //===----------------------------------------------------------------------===// 6049 6050 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6051 SMLoc S = Parser.getTok().getLoc(); 6052 int64_t Imm = 0; 6053 6054 if (!parseExpr(Imm)) { 6055 // The operand is optional, if not present default to 0 6056 Imm = 0; 6057 } 6058 6059 if (!isUInt<16>(Imm)) { 6060 Error(S, "expected a 16-bit value"); 6061 return MatchOperand_ParseFail; 6062 } 6063 6064 Operands.push_back( 6065 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6066 return MatchOperand_Success; 6067 } 6068 6069 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6070