1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "Utils/AMDGPUAsmUtils.h" 16 #include "Utils/AMDGPUBaseInfo.h" 17 #include "Utils/AMDKernelCodeTUtils.h" 18 #include "llvm/ADT/APFloat.h" 19 #include "llvm/ADT/APInt.h" 20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/SmallBitVector.h" 23 #include "llvm/ADT/SmallString.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/StringSwitch.h" 26 #include "llvm/ADT/Twine.h" 27 #include "llvm/BinaryFormat/ELF.h" 28 #include "llvm/MC/MCAsmInfo.h" 29 #include "llvm/MC/MCContext.h" 30 #include "llvm/MC/MCExpr.h" 31 #include "llvm/MC/MCInst.h" 32 #include "llvm/MC/MCInstrDesc.h" 33 #include "llvm/MC/MCInstrInfo.h" 34 #include "llvm/MC/MCParser/MCAsmLexer.h" 35 #include "llvm/MC/MCParser/MCAsmParser.h" 36 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 37 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 38 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/MC/MCStreamer.h" 41 #include "llvm/MC/MCSubtargetInfo.h" 42 #include "llvm/MC/MCSymbol.h" 43 #include "llvm/Support/AMDGPUMetadata.h" 44 #include "llvm/Support/AMDHSAKernelDescriptor.h" 45 #include "llvm/Support/Casting.h" 46 #include "llvm/Support/Compiler.h" 47 #include "llvm/Support/ErrorHandling.h" 48 #include "llvm/Support/MachineValueType.h" 49 #include "llvm/Support/MathExtras.h" 50 #include "llvm/Support/SMLoc.h" 51 #include "llvm/Support/TargetParser.h" 52 #include "llvm/Support/TargetRegistry.h" 53 #include "llvm/Support/raw_ostream.h" 54 #include <algorithm> 55 #include <cassert> 56 #include <cstdint> 57 #include <cstring> 58 #include <iterator> 59 #include <map> 60 #include <memory> 61 #include <string> 62 63 using namespace llvm; 64 using namespace llvm::AMDGPU; 65 using namespace llvm::amdhsa; 66 67 namespace { 68 69 class AMDGPUAsmParser; 70 71 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL }; 72 73 //===----------------------------------------------------------------------===// 74 // Operand 75 //===----------------------------------------------------------------------===// 76 77 class AMDGPUOperand : public MCParsedAsmOperand { 78 enum KindTy { 79 Token, 80 Immediate, 81 Register, 82 Expression 83 } Kind; 84 85 SMLoc StartLoc, EndLoc; 86 const AMDGPUAsmParser *AsmParser; 87 88 public: 89 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 90 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 91 92 using Ptr = std::unique_ptr<AMDGPUOperand>; 93 94 struct Modifiers { 95 bool Abs = false; 96 bool Neg = false; 97 bool Sext = false; 98 99 bool hasFPModifiers() const { return Abs || Neg; } 100 bool hasIntModifiers() const { return Sext; } 101 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 102 103 int64_t getFPModifiersOperand() const { 104 int64_t Operand = 0; 105 Operand |= Abs ? SISrcMods::ABS : 0u; 106 Operand |= Neg ? SISrcMods::NEG : 0u; 107 return Operand; 108 } 109 110 int64_t getIntModifiersOperand() const { 111 int64_t Operand = 0; 112 Operand |= Sext ? SISrcMods::SEXT : 0u; 113 return Operand; 114 } 115 116 int64_t getModifiersOperand() const { 117 assert(!(hasFPModifiers() && hasIntModifiers()) 118 && "fp and int modifiers should not be used simultaneously"); 119 if (hasFPModifiers()) { 120 return getFPModifiersOperand(); 121 } else if (hasIntModifiers()) { 122 return getIntModifiersOperand(); 123 } else { 124 return 0; 125 } 126 } 127 128 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 129 }; 130 131 enum ImmTy { 132 ImmTyNone, 133 ImmTyGDS, 134 ImmTyLDS, 135 ImmTyOffen, 136 ImmTyIdxen, 137 ImmTyAddr64, 138 ImmTyOffset, 139 ImmTyInstOffset, 140 ImmTyOffset0, 141 ImmTyOffset1, 142 ImmTyGLC, 143 ImmTySLC, 144 ImmTyTFE, 145 ImmTyD16, 146 ImmTyClampSI, 147 ImmTyOModSI, 148 ImmTyDppCtrl, 149 ImmTyDppRowMask, 150 ImmTyDppBankMask, 151 ImmTyDppBoundCtrl, 152 ImmTySdwaDstSel, 153 ImmTySdwaSrc0Sel, 154 ImmTySdwaSrc1Sel, 155 ImmTySdwaDstUnused, 156 ImmTyDMask, 157 ImmTyUNorm, 158 ImmTyDA, 159 ImmTyR128A16, 160 ImmTyLWE, 161 ImmTyExpTgt, 162 ImmTyExpCompr, 163 ImmTyExpVM, 164 ImmTyFORMAT, 165 ImmTyHwreg, 166 ImmTyOff, 167 ImmTySendMsg, 168 ImmTyInterpSlot, 169 ImmTyInterpAttr, 170 ImmTyAttrChan, 171 ImmTyOpSel, 172 ImmTyOpSelHi, 173 ImmTyNegLo, 174 ImmTyNegHi, 175 ImmTySwizzle, 176 ImmTyGprIdxMode, 177 ImmTyEndpgm, 178 ImmTyHigh 179 }; 180 181 struct TokOp { 182 const char *Data; 183 unsigned Length; 184 }; 185 186 struct ImmOp { 187 int64_t Val; 188 ImmTy Type; 189 bool IsFPImm; 190 Modifiers Mods; 191 }; 192 193 struct RegOp { 194 unsigned RegNo; 195 bool IsForcedVOP3; 196 Modifiers Mods; 197 }; 198 199 union { 200 TokOp Tok; 201 ImmOp Imm; 202 RegOp Reg; 203 const MCExpr *Expr; 204 }; 205 206 bool isToken() const override { 207 if (Kind == Token) 208 return true; 209 210 if (Kind != Expression || !Expr) 211 return false; 212 213 // When parsing operands, we can't always tell if something was meant to be 214 // a token, like 'gds', or an expression that references a global variable. 215 // In this case, we assume the string is an expression, and if we need to 216 // interpret is a token, then we treat the symbol name as the token. 217 return isa<MCSymbolRefExpr>(Expr); 218 } 219 220 bool isImm() const override { 221 return Kind == Immediate; 222 } 223 224 bool isInlinableImm(MVT type) const; 225 bool isLiteralImm(MVT type) const; 226 227 bool isRegKind() const { 228 return Kind == Register; 229 } 230 231 bool isReg() const override { 232 return isRegKind() && !hasModifiers(); 233 } 234 235 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 236 return isRegClass(RCID) || isInlinableImm(type); 237 } 238 239 bool isRegOrImmWithInt16InputMods() const { 240 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 241 } 242 243 bool isRegOrImmWithInt32InputMods() const { 244 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 245 } 246 247 bool isRegOrImmWithInt64InputMods() const { 248 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 249 } 250 251 bool isRegOrImmWithFP16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 253 } 254 255 bool isRegOrImmWithFP32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 257 } 258 259 bool isRegOrImmWithFP64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 261 } 262 263 bool isVReg() const { 264 return isRegClass(AMDGPU::VGPR_32RegClassID) || 265 isRegClass(AMDGPU::VReg_64RegClassID) || 266 isRegClass(AMDGPU::VReg_96RegClassID) || 267 isRegClass(AMDGPU::VReg_128RegClassID) || 268 isRegClass(AMDGPU::VReg_256RegClassID) || 269 isRegClass(AMDGPU::VReg_512RegClassID); 270 } 271 272 bool isVReg32() const { 273 return isRegClass(AMDGPU::VGPR_32RegClassID); 274 } 275 276 bool isVReg32OrOff() const { 277 return isOff() || isVReg32(); 278 } 279 280 bool isSDWAOperand(MVT type) const; 281 bool isSDWAFP16Operand() const; 282 bool isSDWAFP32Operand() const; 283 bool isSDWAInt16Operand() const; 284 bool isSDWAInt32Operand() const; 285 286 bool isImmTy(ImmTy ImmT) const { 287 return isImm() && Imm.Type == ImmT; 288 } 289 290 bool isImmModifier() const { 291 return isImm() && Imm.Type != ImmTyNone; 292 } 293 294 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 295 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 296 bool isDMask() const { return isImmTy(ImmTyDMask); } 297 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 298 bool isDA() const { return isImmTy(ImmTyDA); } 299 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 300 bool isLWE() const { return isImmTy(ImmTyLWE); } 301 bool isOff() const { return isImmTy(ImmTyOff); } 302 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 303 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 304 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 305 bool isOffen() const { return isImmTy(ImmTyOffen); } 306 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 307 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 308 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 309 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 310 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 311 312 bool isOffsetU12() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isUInt<12>(getImm()); } 313 bool isOffsetS13() const { return (isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset)) && isInt<13>(getImm()); } 314 bool isGDS() const { return isImmTy(ImmTyGDS); } 315 bool isLDS() const { return isImmTy(ImmTyLDS); } 316 bool isGLC() const { return isImmTy(ImmTyGLC); } 317 bool isSLC() const { return isImmTy(ImmTySLC); } 318 bool isTFE() const { return isImmTy(ImmTyTFE); } 319 bool isD16() const { return isImmTy(ImmTyD16); } 320 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 321 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 322 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 323 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 324 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 325 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 326 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 327 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 328 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 329 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 330 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 331 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 332 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 333 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 334 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 335 bool isHigh() const { return isImmTy(ImmTyHigh); } 336 337 bool isMod() const { 338 return isClampSI() || isOModSI(); 339 } 340 341 bool isRegOrImm() const { 342 return isReg() || isImm(); 343 } 344 345 bool isRegClass(unsigned RCID) const; 346 347 bool isInlineValue() const; 348 349 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 350 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 351 } 352 353 bool isSCSrcB16() const { 354 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 355 } 356 357 bool isSCSrcV2B16() const { 358 return isSCSrcB16(); 359 } 360 361 bool isSCSrcB32() const { 362 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 363 } 364 365 bool isSCSrcB64() const { 366 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 367 } 368 369 bool isSCSrcF16() const { 370 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 371 } 372 373 bool isSCSrcV2F16() const { 374 return isSCSrcF16(); 375 } 376 377 bool isSCSrcF32() const { 378 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 379 } 380 381 bool isSCSrcF64() const { 382 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 383 } 384 385 bool isSSrcB32() const { 386 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 387 } 388 389 bool isSSrcB16() const { 390 return isSCSrcB16() || isLiteralImm(MVT::i16); 391 } 392 393 bool isSSrcV2B16() const { 394 llvm_unreachable("cannot happen"); 395 return isSSrcB16(); 396 } 397 398 bool isSSrcB64() const { 399 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 400 // See isVSrc64(). 401 return isSCSrcB64() || isLiteralImm(MVT::i64); 402 } 403 404 bool isSSrcF32() const { 405 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 406 } 407 408 bool isSSrcF64() const { 409 return isSCSrcB64() || isLiteralImm(MVT::f64); 410 } 411 412 bool isSSrcF16() const { 413 return isSCSrcB16() || isLiteralImm(MVT::f16); 414 } 415 416 bool isSSrcV2F16() const { 417 llvm_unreachable("cannot happen"); 418 return isSSrcF16(); 419 } 420 421 bool isSSrcOrLdsB32() const { 422 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 423 isLiteralImm(MVT::i32) || isExpr(); 424 } 425 426 bool isVCSrcB32() const { 427 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 428 } 429 430 bool isVCSrcB64() const { 431 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 432 } 433 434 bool isVCSrcB16() const { 435 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 436 } 437 438 bool isVCSrcV2B16() const { 439 return isVCSrcB16(); 440 } 441 442 bool isVCSrcF32() const { 443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 444 } 445 446 bool isVCSrcF64() const { 447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 448 } 449 450 bool isVCSrcF16() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 452 } 453 454 bool isVCSrcV2F16() const { 455 return isVCSrcF16(); 456 } 457 458 bool isVSrcB32() const { 459 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 460 } 461 462 bool isVSrcB64() const { 463 return isVCSrcF64() || isLiteralImm(MVT::i64); 464 } 465 466 bool isVSrcB16() const { 467 return isVCSrcF16() || isLiteralImm(MVT::i16); 468 } 469 470 bool isVSrcV2B16() const { 471 llvm_unreachable("cannot happen"); 472 return isVSrcB16(); 473 } 474 475 bool isVSrcF32() const { 476 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 477 } 478 479 bool isVSrcF64() const { 480 return isVCSrcF64() || isLiteralImm(MVT::f64); 481 } 482 483 bool isVSrcF16() const { 484 return isVCSrcF16() || isLiteralImm(MVT::f16); 485 } 486 487 bool isVSrcV2F16() const { 488 llvm_unreachable("cannot happen"); 489 return isVSrcF16(); 490 } 491 492 bool isKImmFP32() const { 493 return isLiteralImm(MVT::f32); 494 } 495 496 bool isKImmFP16() const { 497 return isLiteralImm(MVT::f16); 498 } 499 500 bool isMem() const override { 501 return false; 502 } 503 504 bool isExpr() const { 505 return Kind == Expression; 506 } 507 508 bool isSoppBrTarget() const { 509 return isExpr() || isImm(); 510 } 511 512 bool isSWaitCnt() const; 513 bool isHwreg() const; 514 bool isSendMsg() const; 515 bool isSwizzle() const; 516 bool isSMRDOffset8() const; 517 bool isSMRDOffset20() const; 518 bool isSMRDLiteralOffset() const; 519 bool isDPPCtrl() const; 520 bool isGPRIdxMode() const; 521 bool isS16Imm() const; 522 bool isU16Imm() const; 523 bool isEndpgm() const; 524 525 StringRef getExpressionAsToken() const { 526 assert(isExpr()); 527 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 528 return S->getSymbol().getName(); 529 } 530 531 StringRef getToken() const { 532 assert(isToken()); 533 534 if (Kind == Expression) 535 return getExpressionAsToken(); 536 537 return StringRef(Tok.Data, Tok.Length); 538 } 539 540 int64_t getImm() const { 541 assert(isImm()); 542 return Imm.Val; 543 } 544 545 ImmTy getImmTy() const { 546 assert(isImm()); 547 return Imm.Type; 548 } 549 550 unsigned getReg() const override { 551 return Reg.RegNo; 552 } 553 554 SMLoc getStartLoc() const override { 555 return StartLoc; 556 } 557 558 SMLoc getEndLoc() const override { 559 return EndLoc; 560 } 561 562 SMRange getLocRange() const { 563 return SMRange(StartLoc, EndLoc); 564 } 565 566 Modifiers getModifiers() const { 567 assert(isRegKind() || isImmTy(ImmTyNone)); 568 return isRegKind() ? Reg.Mods : Imm.Mods; 569 } 570 571 void setModifiers(Modifiers Mods) { 572 assert(isRegKind() || isImmTy(ImmTyNone)); 573 if (isRegKind()) 574 Reg.Mods = Mods; 575 else 576 Imm.Mods = Mods; 577 } 578 579 bool hasModifiers() const { 580 return getModifiers().hasModifiers(); 581 } 582 583 bool hasFPModifiers() const { 584 return getModifiers().hasFPModifiers(); 585 } 586 587 bool hasIntModifiers() const { 588 return getModifiers().hasIntModifiers(); 589 } 590 591 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 592 593 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 594 595 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 596 597 template <unsigned Bitwidth> 598 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 599 600 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 601 addKImmFPOperands<16>(Inst, N); 602 } 603 604 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 605 addKImmFPOperands<32>(Inst, N); 606 } 607 608 void addRegOperands(MCInst &Inst, unsigned N) const; 609 610 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 611 if (isRegKind()) 612 addRegOperands(Inst, N); 613 else if (isExpr()) 614 Inst.addOperand(MCOperand::createExpr(Expr)); 615 else 616 addImmOperands(Inst, N); 617 } 618 619 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 620 Modifiers Mods = getModifiers(); 621 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 622 if (isRegKind()) { 623 addRegOperands(Inst, N); 624 } else { 625 addImmOperands(Inst, N, false); 626 } 627 } 628 629 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 630 assert(!hasIntModifiers()); 631 addRegOrImmWithInputModsOperands(Inst, N); 632 } 633 634 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 635 assert(!hasFPModifiers()); 636 addRegOrImmWithInputModsOperands(Inst, N); 637 } 638 639 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 640 Modifiers Mods = getModifiers(); 641 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 642 assert(isRegKind()); 643 addRegOperands(Inst, N); 644 } 645 646 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 647 assert(!hasIntModifiers()); 648 addRegWithInputModsOperands(Inst, N); 649 } 650 651 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 652 assert(!hasFPModifiers()); 653 addRegWithInputModsOperands(Inst, N); 654 } 655 656 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 657 if (isImm()) 658 addImmOperands(Inst, N); 659 else { 660 assert(isExpr()); 661 Inst.addOperand(MCOperand::createExpr(Expr)); 662 } 663 } 664 665 static void printImmTy(raw_ostream& OS, ImmTy Type) { 666 switch (Type) { 667 case ImmTyNone: OS << "None"; break; 668 case ImmTyGDS: OS << "GDS"; break; 669 case ImmTyLDS: OS << "LDS"; break; 670 case ImmTyOffen: OS << "Offen"; break; 671 case ImmTyIdxen: OS << "Idxen"; break; 672 case ImmTyAddr64: OS << "Addr64"; break; 673 case ImmTyOffset: OS << "Offset"; break; 674 case ImmTyInstOffset: OS << "InstOffset"; break; 675 case ImmTyOffset0: OS << "Offset0"; break; 676 case ImmTyOffset1: OS << "Offset1"; break; 677 case ImmTyGLC: OS << "GLC"; break; 678 case ImmTySLC: OS << "SLC"; break; 679 case ImmTyTFE: OS << "TFE"; break; 680 case ImmTyD16: OS << "D16"; break; 681 case ImmTyFORMAT: OS << "FORMAT"; break; 682 case ImmTyClampSI: OS << "ClampSI"; break; 683 case ImmTyOModSI: OS << "OModSI"; break; 684 case ImmTyDppCtrl: OS << "DppCtrl"; break; 685 case ImmTyDppRowMask: OS << "DppRowMask"; break; 686 case ImmTyDppBankMask: OS << "DppBankMask"; break; 687 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 688 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 689 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 690 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 691 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 692 case ImmTyDMask: OS << "DMask"; break; 693 case ImmTyUNorm: OS << "UNorm"; break; 694 case ImmTyDA: OS << "DA"; break; 695 case ImmTyR128A16: OS << "R128A16"; break; 696 case ImmTyLWE: OS << "LWE"; break; 697 case ImmTyOff: OS << "Off"; break; 698 case ImmTyExpTgt: OS << "ExpTgt"; break; 699 case ImmTyExpCompr: OS << "ExpCompr"; break; 700 case ImmTyExpVM: OS << "ExpVM"; break; 701 case ImmTyHwreg: OS << "Hwreg"; break; 702 case ImmTySendMsg: OS << "SendMsg"; break; 703 case ImmTyInterpSlot: OS << "InterpSlot"; break; 704 case ImmTyInterpAttr: OS << "InterpAttr"; break; 705 case ImmTyAttrChan: OS << "AttrChan"; break; 706 case ImmTyOpSel: OS << "OpSel"; break; 707 case ImmTyOpSelHi: OS << "OpSelHi"; break; 708 case ImmTyNegLo: OS << "NegLo"; break; 709 case ImmTyNegHi: OS << "NegHi"; break; 710 case ImmTySwizzle: OS << "Swizzle"; break; 711 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 712 case ImmTyHigh: OS << "High"; break; 713 case ImmTyEndpgm: 714 OS << "Endpgm"; 715 break; 716 } 717 } 718 719 void print(raw_ostream &OS) const override { 720 switch (Kind) { 721 case Register: 722 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 723 break; 724 case Immediate: 725 OS << '<' << getImm(); 726 if (getImmTy() != ImmTyNone) { 727 OS << " type: "; printImmTy(OS, getImmTy()); 728 } 729 OS << " mods: " << Imm.Mods << '>'; 730 break; 731 case Token: 732 OS << '\'' << getToken() << '\''; 733 break; 734 case Expression: 735 OS << "<expr " << *Expr << '>'; 736 break; 737 } 738 } 739 740 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 741 int64_t Val, SMLoc Loc, 742 ImmTy Type = ImmTyNone, 743 bool IsFPImm = false) { 744 auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser); 745 Op->Imm.Val = Val; 746 Op->Imm.IsFPImm = IsFPImm; 747 Op->Imm.Type = Type; 748 Op->Imm.Mods = Modifiers(); 749 Op->StartLoc = Loc; 750 Op->EndLoc = Loc; 751 return Op; 752 } 753 754 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 755 StringRef Str, SMLoc Loc, 756 bool HasExplicitEncodingSize = true) { 757 auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser); 758 Res->Tok.Data = Str.data(); 759 Res->Tok.Length = Str.size(); 760 Res->StartLoc = Loc; 761 Res->EndLoc = Loc; 762 return Res; 763 } 764 765 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 766 unsigned RegNo, SMLoc S, 767 SMLoc E, 768 bool ForceVOP3) { 769 auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser); 770 Op->Reg.RegNo = RegNo; 771 Op->Reg.Mods = Modifiers(); 772 Op->Reg.IsForcedVOP3 = ForceVOP3; 773 Op->StartLoc = S; 774 Op->EndLoc = E; 775 return Op; 776 } 777 778 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 779 const class MCExpr *Expr, SMLoc S) { 780 auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser); 781 Op->Expr = Expr; 782 Op->StartLoc = S; 783 Op->EndLoc = S; 784 return Op; 785 } 786 }; 787 788 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 789 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 790 return OS; 791 } 792 793 //===----------------------------------------------------------------------===// 794 // AsmParser 795 //===----------------------------------------------------------------------===// 796 797 // Holds info related to the current kernel, e.g. count of SGPRs used. 798 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 799 // .amdgpu_hsa_kernel or at EOF. 800 class KernelScopeInfo { 801 int SgprIndexUnusedMin = -1; 802 int VgprIndexUnusedMin = -1; 803 MCContext *Ctx = nullptr; 804 805 void usesSgprAt(int i) { 806 if (i >= SgprIndexUnusedMin) { 807 SgprIndexUnusedMin = ++i; 808 if (Ctx) { 809 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 810 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 811 } 812 } 813 } 814 815 void usesVgprAt(int i) { 816 if (i >= VgprIndexUnusedMin) { 817 VgprIndexUnusedMin = ++i; 818 if (Ctx) { 819 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 820 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 821 } 822 } 823 } 824 825 public: 826 KernelScopeInfo() = default; 827 828 void initialize(MCContext &Context) { 829 Ctx = &Context; 830 usesSgprAt(SgprIndexUnusedMin = -1); 831 usesVgprAt(VgprIndexUnusedMin = -1); 832 } 833 834 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 835 switch (RegKind) { 836 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 837 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 838 default: break; 839 } 840 } 841 }; 842 843 class AMDGPUAsmParser : public MCTargetAsmParser { 844 MCAsmParser &Parser; 845 846 // Number of extra operands parsed after the first optional operand. 847 // This may be necessary to skip hardcoded mandatory operands. 848 static const unsigned MAX_OPR_LOOKAHEAD = 8; 849 850 unsigned ForcedEncodingSize = 0; 851 bool ForcedDPP = false; 852 bool ForcedSDWA = false; 853 KernelScopeInfo KernelScope; 854 855 /// @name Auto-generated Match Functions 856 /// { 857 858 #define GET_ASSEMBLER_HEADER 859 #include "AMDGPUGenAsmMatcher.inc" 860 861 /// } 862 863 private: 864 bool ParseAsAbsoluteExpression(uint32_t &Ret); 865 bool OutOfRangeError(SMRange Range); 866 /// Calculate VGPR/SGPR blocks required for given target, reserved 867 /// registers, and user-specified NextFreeXGPR values. 868 /// 869 /// \param Features [in] Target features, used for bug corrections. 870 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 871 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 872 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 873 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 874 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 875 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 876 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 877 /// \param VGPRBlocks [out] Result VGPR block count. 878 /// \param SGPRBlocks [out] Result SGPR block count. 879 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 880 bool FlatScrUsed, bool XNACKUsed, 881 unsigned NextFreeVGPR, SMRange VGPRRange, 882 unsigned NextFreeSGPR, SMRange SGPRRange, 883 unsigned &VGPRBlocks, unsigned &SGPRBlocks); 884 bool ParseDirectiveAMDGCNTarget(); 885 bool ParseDirectiveAMDHSAKernel(); 886 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 887 bool ParseDirectiveHSACodeObjectVersion(); 888 bool ParseDirectiveHSACodeObjectISA(); 889 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 890 bool ParseDirectiveAMDKernelCodeT(); 891 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 892 bool ParseDirectiveAMDGPUHsaKernel(); 893 894 bool ParseDirectiveISAVersion(); 895 bool ParseDirectiveHSAMetadata(); 896 bool ParseDirectivePALMetadataBegin(); 897 bool ParseDirectivePALMetadata(); 898 899 /// Common code to parse out a block of text (typically YAML) between start and 900 /// end directives. 901 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 902 const char *AssemblerDirectiveEnd, 903 std::string &CollectString); 904 905 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 906 RegisterKind RegKind, unsigned Reg1, 907 unsigned RegNum); 908 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 909 unsigned& RegNum, unsigned& RegWidth, 910 unsigned *DwordRegIndex); 911 bool isRegister(); 912 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 913 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 914 void initializeGprCountSymbol(RegisterKind RegKind); 915 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 916 unsigned RegWidth); 917 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 918 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 919 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 920 bool IsGdsHardcoded); 921 922 public: 923 enum AMDGPUMatchResultTy { 924 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 925 }; 926 927 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 928 929 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 930 const MCInstrInfo &MII, 931 const MCTargetOptions &Options) 932 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 933 MCAsmParserExtension::Initialize(Parser); 934 935 if (getFeatureBits().none()) { 936 // Set default features. 937 copySTI().ToggleFeature("southern-islands"); 938 } 939 940 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 941 942 { 943 // TODO: make those pre-defined variables read-only. 944 // Currently there is none suitable machinery in the core llvm-mc for this. 945 // MCSymbol::isRedefinable is intended for another purpose, and 946 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 947 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 948 MCContext &Ctx = getContext(); 949 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 950 MCSymbol *Sym = 951 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 952 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 953 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 954 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 955 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 956 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 957 } else { 958 MCSymbol *Sym = 959 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 960 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 961 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 962 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 963 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 964 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 965 } 966 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 967 initializeGprCountSymbol(IS_VGPR); 968 initializeGprCountSymbol(IS_SGPR); 969 } else 970 KernelScope.initialize(getContext()); 971 } 972 } 973 974 bool hasXNACK() const { 975 return AMDGPU::hasXNACK(getSTI()); 976 } 977 978 bool hasMIMG_R128() const { 979 return AMDGPU::hasMIMG_R128(getSTI()); 980 } 981 982 bool hasPackedD16() const { 983 return AMDGPU::hasPackedD16(getSTI()); 984 } 985 986 bool isSI() const { 987 return AMDGPU::isSI(getSTI()); 988 } 989 990 bool isCI() const { 991 return AMDGPU::isCI(getSTI()); 992 } 993 994 bool isVI() const { 995 return AMDGPU::isVI(getSTI()); 996 } 997 998 bool isGFX9() const { 999 return AMDGPU::isGFX9(getSTI()); 1000 } 1001 1002 bool hasInv2PiInlineImm() const { 1003 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1004 } 1005 1006 bool hasFlatOffsets() const { 1007 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1008 } 1009 1010 bool hasSGPR102_SGPR103() const { 1011 return !isVI(); 1012 } 1013 1014 bool hasIntClamp() const { 1015 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1016 } 1017 1018 AMDGPUTargetStreamer &getTargetStreamer() { 1019 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1020 return static_cast<AMDGPUTargetStreamer &>(TS); 1021 } 1022 1023 const MCRegisterInfo *getMRI() const { 1024 // We need this const_cast because for some reason getContext() is not const 1025 // in MCAsmParser. 1026 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1027 } 1028 1029 const MCInstrInfo *getMII() const { 1030 return &MII; 1031 } 1032 1033 const FeatureBitset &getFeatureBits() const { 1034 return getSTI().getFeatureBits(); 1035 } 1036 1037 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1038 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1039 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1040 1041 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1042 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1043 bool isForcedDPP() const { return ForcedDPP; } 1044 bool isForcedSDWA() const { return ForcedSDWA; } 1045 ArrayRef<unsigned> getMatchedVariants() const; 1046 1047 std::unique_ptr<AMDGPUOperand> parseRegister(); 1048 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1049 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1050 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1051 unsigned Kind) override; 1052 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1053 OperandVector &Operands, MCStreamer &Out, 1054 uint64_t &ErrorInfo, 1055 bool MatchingInlineAsm) override; 1056 bool ParseDirective(AsmToken DirectiveID) override; 1057 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic); 1058 StringRef parseMnemonicSuffix(StringRef Name); 1059 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1060 SMLoc NameLoc, OperandVector &Operands) override; 1061 //bool ProcessInstruction(MCInst &Inst); 1062 1063 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1064 1065 OperandMatchResultTy 1066 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1067 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1068 bool (*ConvertResult)(int64_t &) = nullptr); 1069 1070 OperandMatchResultTy parseOperandArrayWithPrefix( 1071 const char *Prefix, 1072 OperandVector &Operands, 1073 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1074 bool (*ConvertResult)(int64_t&) = nullptr); 1075 1076 OperandMatchResultTy 1077 parseNamedBit(const char *Name, OperandVector &Operands, 1078 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1079 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1080 StringRef &Value); 1081 1082 bool parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier = false); 1083 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1084 OperandMatchResultTy parseReg(OperandVector &Operands); 1085 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool AbsMod = false); 1086 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1087 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1088 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1089 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1090 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1091 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1092 1093 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1094 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1095 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1096 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1097 1098 bool parseCnt(int64_t &IntVal); 1099 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1100 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1101 1102 private: 1103 struct OperandInfoTy { 1104 int64_t Id; 1105 bool IsSymbolic = false; 1106 1107 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1108 }; 1109 1110 bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId); 1111 bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1112 1113 void errorExpTgt(); 1114 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1115 1116 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc); 1117 bool validateSOPLiteral(const MCInst &Inst) const; 1118 bool validateConstantBusLimitations(const MCInst &Inst); 1119 bool validateEarlyClobberLimitations(const MCInst &Inst); 1120 bool validateIntClampSupported(const MCInst &Inst); 1121 bool validateMIMGAtomicDMask(const MCInst &Inst); 1122 bool validateMIMGGatherDMask(const MCInst &Inst); 1123 bool validateMIMGDataSize(const MCInst &Inst); 1124 bool validateMIMGD16(const MCInst &Inst); 1125 bool validateLdsDirect(const MCInst &Inst); 1126 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1127 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1128 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1129 1130 bool isId(const StringRef Id) const; 1131 bool isId(const AsmToken &Token, const StringRef Id) const; 1132 bool isToken(const AsmToken::TokenKind Kind) const; 1133 bool trySkipId(const StringRef Id); 1134 bool trySkipToken(const AsmToken::TokenKind Kind); 1135 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1136 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1137 AsmToken::TokenKind getTokenKind() const; 1138 bool parseExpr(int64_t &Imm); 1139 StringRef getTokenStr() const; 1140 AsmToken peekToken(); 1141 AsmToken getToken() const; 1142 SMLoc getLoc() const; 1143 void lex(); 1144 1145 public: 1146 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1147 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1148 1149 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1150 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1151 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1152 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1153 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1154 1155 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1156 const unsigned MinVal, 1157 const unsigned MaxVal, 1158 const StringRef ErrMsg); 1159 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1160 bool parseSwizzleOffset(int64_t &Imm); 1161 bool parseSwizzleMacro(int64_t &Imm); 1162 bool parseSwizzleQuadPerm(int64_t &Imm); 1163 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1164 bool parseSwizzleBroadcast(int64_t &Imm); 1165 bool parseSwizzleSwap(int64_t &Imm); 1166 bool parseSwizzleReverse(int64_t &Imm); 1167 1168 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1169 int64_t parseGPRIdxMacro(); 1170 1171 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1172 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1173 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1174 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1175 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1176 1177 AMDGPUOperand::Ptr defaultGLC() const; 1178 AMDGPUOperand::Ptr defaultSLC() const; 1179 1180 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1181 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1182 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1183 AMDGPUOperand::Ptr defaultOffsetU12() const; 1184 AMDGPUOperand::Ptr defaultOffsetS13() const; 1185 1186 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1187 1188 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1189 OptionalImmIndexMap &OptionalIdx); 1190 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1191 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1192 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1193 1194 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1195 1196 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1197 bool IsAtomic = false); 1198 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1199 1200 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1201 AMDGPUOperand::Ptr defaultRowMask() const; 1202 AMDGPUOperand::Ptr defaultBankMask() const; 1203 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1204 void cvtDPP(MCInst &Inst, const OperandVector &Operands); 1205 1206 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1207 AMDGPUOperand::ImmTy Type); 1208 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1209 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1210 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1211 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1212 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1213 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1214 uint64_t BasicInstType, bool skipVcc = false); 1215 1216 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1217 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1218 }; 1219 1220 struct OptionalOperand { 1221 const char *Name; 1222 AMDGPUOperand::ImmTy Type; 1223 bool IsBit; 1224 bool (*ConvertResult)(int64_t&); 1225 }; 1226 1227 } // end anonymous namespace 1228 1229 // May be called with integer type with equivalent bitwidth. 1230 static const fltSemantics *getFltSemantics(unsigned Size) { 1231 switch (Size) { 1232 case 4: 1233 return &APFloat::IEEEsingle(); 1234 case 8: 1235 return &APFloat::IEEEdouble(); 1236 case 2: 1237 return &APFloat::IEEEhalf(); 1238 default: 1239 llvm_unreachable("unsupported fp type"); 1240 } 1241 } 1242 1243 static const fltSemantics *getFltSemantics(MVT VT) { 1244 return getFltSemantics(VT.getSizeInBits() / 8); 1245 } 1246 1247 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1248 switch (OperandType) { 1249 case AMDGPU::OPERAND_REG_IMM_INT32: 1250 case AMDGPU::OPERAND_REG_IMM_FP32: 1251 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1252 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1253 return &APFloat::IEEEsingle(); 1254 case AMDGPU::OPERAND_REG_IMM_INT64: 1255 case AMDGPU::OPERAND_REG_IMM_FP64: 1256 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1257 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1258 return &APFloat::IEEEdouble(); 1259 case AMDGPU::OPERAND_REG_IMM_INT16: 1260 case AMDGPU::OPERAND_REG_IMM_FP16: 1261 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1262 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1263 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1264 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1265 return &APFloat::IEEEhalf(); 1266 default: 1267 llvm_unreachable("unsupported fp type"); 1268 } 1269 } 1270 1271 //===----------------------------------------------------------------------===// 1272 // Operand 1273 //===----------------------------------------------------------------------===// 1274 1275 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1276 bool Lost; 1277 1278 // Convert literal to single precision 1279 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1280 APFloat::rmNearestTiesToEven, 1281 &Lost); 1282 // We allow precision lost but not overflow or underflow 1283 if (Status != APFloat::opOK && 1284 Lost && 1285 ((Status & APFloat::opOverflow) != 0 || 1286 (Status & APFloat::opUnderflow) != 0)) { 1287 return false; 1288 } 1289 1290 return true; 1291 } 1292 1293 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1294 return isUIntN(Size, Val) || isIntN(Size, Val); 1295 } 1296 1297 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1298 1299 // This is a hack to enable named inline values like 1300 // shared_base with both 32-bit and 64-bit operands. 1301 // Note that these values are defined as 1302 // 32-bit operands only. 1303 if (isInlineValue()) { 1304 return true; 1305 } 1306 1307 if (!isImmTy(ImmTyNone)) { 1308 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1309 return false; 1310 } 1311 // TODO: We should avoid using host float here. It would be better to 1312 // check the float bit values which is what a few other places do. 1313 // We've had bot failures before due to weird NaN support on mips hosts. 1314 1315 APInt Literal(64, Imm.Val); 1316 1317 if (Imm.IsFPImm) { // We got fp literal token 1318 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1319 return AMDGPU::isInlinableLiteral64(Imm.Val, 1320 AsmParser->hasInv2PiInlineImm()); 1321 } 1322 1323 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1324 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1325 return false; 1326 1327 if (type.getScalarSizeInBits() == 16) { 1328 return AMDGPU::isInlinableLiteral16( 1329 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1330 AsmParser->hasInv2PiInlineImm()); 1331 } 1332 1333 // Check if single precision literal is inlinable 1334 return AMDGPU::isInlinableLiteral32( 1335 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1336 AsmParser->hasInv2PiInlineImm()); 1337 } 1338 1339 // We got int literal token. 1340 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1341 return AMDGPU::isInlinableLiteral64(Imm.Val, 1342 AsmParser->hasInv2PiInlineImm()); 1343 } 1344 1345 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1346 return false; 1347 } 1348 1349 if (type.getScalarSizeInBits() == 16) { 1350 return AMDGPU::isInlinableLiteral16( 1351 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1352 AsmParser->hasInv2PiInlineImm()); 1353 } 1354 1355 return AMDGPU::isInlinableLiteral32( 1356 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1357 AsmParser->hasInv2PiInlineImm()); 1358 } 1359 1360 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1361 // Check that this immediate can be added as literal 1362 if (!isImmTy(ImmTyNone)) { 1363 return false; 1364 } 1365 1366 if (!Imm.IsFPImm) { 1367 // We got int literal token. 1368 1369 if (type == MVT::f64 && hasFPModifiers()) { 1370 // Cannot apply fp modifiers to int literals preserving the same semantics 1371 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1372 // disable these cases. 1373 return false; 1374 } 1375 1376 unsigned Size = type.getSizeInBits(); 1377 if (Size == 64) 1378 Size = 32; 1379 1380 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1381 // types. 1382 return isSafeTruncation(Imm.Val, Size); 1383 } 1384 1385 // We got fp literal token 1386 if (type == MVT::f64) { // Expected 64-bit fp operand 1387 // We would set low 64-bits of literal to zeroes but we accept this literals 1388 return true; 1389 } 1390 1391 if (type == MVT::i64) { // Expected 64-bit int operand 1392 // We don't allow fp literals in 64-bit integer instructions. It is 1393 // unclear how we should encode them. 1394 return false; 1395 } 1396 1397 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1398 return canLosslesslyConvertToFPType(FPLiteral, type); 1399 } 1400 1401 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1402 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1403 } 1404 1405 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1406 if (AsmParser->isVI()) 1407 return isVReg32(); 1408 else if (AsmParser->isGFX9()) 1409 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1410 else 1411 return false; 1412 } 1413 1414 bool AMDGPUOperand::isSDWAFP16Operand() const { 1415 return isSDWAOperand(MVT::f16); 1416 } 1417 1418 bool AMDGPUOperand::isSDWAFP32Operand() const { 1419 return isSDWAOperand(MVT::f32); 1420 } 1421 1422 bool AMDGPUOperand::isSDWAInt16Operand() const { 1423 return isSDWAOperand(MVT::i16); 1424 } 1425 1426 bool AMDGPUOperand::isSDWAInt32Operand() const { 1427 return isSDWAOperand(MVT::i32); 1428 } 1429 1430 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1431 { 1432 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1433 assert(Size == 2 || Size == 4 || Size == 8); 1434 1435 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1436 1437 if (Imm.Mods.Abs) { 1438 Val &= ~FpSignMask; 1439 } 1440 if (Imm.Mods.Neg) { 1441 Val ^= FpSignMask; 1442 } 1443 1444 return Val; 1445 } 1446 1447 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1448 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1449 Inst.getNumOperands())) { 1450 addLiteralImmOperand(Inst, Imm.Val, 1451 ApplyModifiers & 1452 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1453 } else { 1454 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1455 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1456 } 1457 } 1458 1459 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1460 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1461 auto OpNum = Inst.getNumOperands(); 1462 // Check that this operand accepts literals 1463 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1464 1465 if (ApplyModifiers) { 1466 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1467 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1468 Val = applyInputFPModifiers(Val, Size); 1469 } 1470 1471 APInt Literal(64, Val); 1472 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1473 1474 if (Imm.IsFPImm) { // We got fp literal token 1475 switch (OpTy) { 1476 case AMDGPU::OPERAND_REG_IMM_INT64: 1477 case AMDGPU::OPERAND_REG_IMM_FP64: 1478 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1479 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1480 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1481 AsmParser->hasInv2PiInlineImm())) { 1482 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1483 return; 1484 } 1485 1486 // Non-inlineable 1487 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1488 // For fp operands we check if low 32 bits are zeros 1489 if (Literal.getLoBits(32) != 0) { 1490 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1491 "Can't encode literal as exact 64-bit floating-point operand. " 1492 "Low 32-bits will be set to zero"); 1493 } 1494 1495 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1496 return; 1497 } 1498 1499 // We don't allow fp literals in 64-bit integer instructions. It is 1500 // unclear how we should encode them. This case should be checked earlier 1501 // in predicate methods (isLiteralImm()) 1502 llvm_unreachable("fp literal in 64-bit integer instruction."); 1503 1504 case AMDGPU::OPERAND_REG_IMM_INT32: 1505 case AMDGPU::OPERAND_REG_IMM_FP32: 1506 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1507 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1508 case AMDGPU::OPERAND_REG_IMM_INT16: 1509 case AMDGPU::OPERAND_REG_IMM_FP16: 1510 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1511 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1512 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1513 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1514 bool lost; 1515 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1516 // Convert literal to single precision 1517 FPLiteral.convert(*getOpFltSemantics(OpTy), 1518 APFloat::rmNearestTiesToEven, &lost); 1519 // We allow precision lost but not overflow or underflow. This should be 1520 // checked earlier in isLiteralImm() 1521 1522 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1523 Inst.addOperand(MCOperand::createImm(ImmVal)); 1524 return; 1525 } 1526 default: 1527 llvm_unreachable("invalid operand size"); 1528 } 1529 1530 return; 1531 } 1532 1533 // We got int literal token. 1534 // Only sign extend inline immediates. 1535 switch (OpTy) { 1536 case AMDGPU::OPERAND_REG_IMM_INT32: 1537 case AMDGPU::OPERAND_REG_IMM_FP32: 1538 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1539 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1540 if (isSafeTruncation(Val, 32) && 1541 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1542 AsmParser->hasInv2PiInlineImm())) { 1543 Inst.addOperand(MCOperand::createImm(Val)); 1544 return; 1545 } 1546 1547 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1548 return; 1549 1550 case AMDGPU::OPERAND_REG_IMM_INT64: 1551 case AMDGPU::OPERAND_REG_IMM_FP64: 1552 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1553 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1554 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1555 Inst.addOperand(MCOperand::createImm(Val)); 1556 return; 1557 } 1558 1559 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1560 return; 1561 1562 case AMDGPU::OPERAND_REG_IMM_INT16: 1563 case AMDGPU::OPERAND_REG_IMM_FP16: 1564 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1565 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1566 if (isSafeTruncation(Val, 16) && 1567 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1568 AsmParser->hasInv2PiInlineImm())) { 1569 Inst.addOperand(MCOperand::createImm(Val)); 1570 return; 1571 } 1572 1573 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1574 return; 1575 1576 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1577 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: { 1578 assert(isSafeTruncation(Val, 16)); 1579 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1580 AsmParser->hasInv2PiInlineImm())); 1581 1582 Inst.addOperand(MCOperand::createImm(Val)); 1583 return; 1584 } 1585 default: 1586 llvm_unreachable("invalid operand size"); 1587 } 1588 } 1589 1590 template <unsigned Bitwidth> 1591 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1592 APInt Literal(64, Imm.Val); 1593 1594 if (!Imm.IsFPImm) { 1595 // We got int literal token. 1596 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1597 return; 1598 } 1599 1600 bool Lost; 1601 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1602 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1603 APFloat::rmNearestTiesToEven, &Lost); 1604 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1605 } 1606 1607 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1608 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1609 } 1610 1611 static bool isInlineValue(unsigned Reg) { 1612 switch (Reg) { 1613 case AMDGPU::SRC_SHARED_BASE: 1614 case AMDGPU::SRC_SHARED_LIMIT: 1615 case AMDGPU::SRC_PRIVATE_BASE: 1616 case AMDGPU::SRC_PRIVATE_LIMIT: 1617 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1618 return true; 1619 default: 1620 return false; 1621 } 1622 } 1623 1624 bool AMDGPUOperand::isInlineValue() const { 1625 return isRegKind() && ::isInlineValue(getReg()); 1626 } 1627 1628 //===----------------------------------------------------------------------===// 1629 // AsmParser 1630 //===----------------------------------------------------------------------===// 1631 1632 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1633 if (Is == IS_VGPR) { 1634 switch (RegWidth) { 1635 default: return -1; 1636 case 1: return AMDGPU::VGPR_32RegClassID; 1637 case 2: return AMDGPU::VReg_64RegClassID; 1638 case 3: return AMDGPU::VReg_96RegClassID; 1639 case 4: return AMDGPU::VReg_128RegClassID; 1640 case 8: return AMDGPU::VReg_256RegClassID; 1641 case 16: return AMDGPU::VReg_512RegClassID; 1642 } 1643 } else if (Is == IS_TTMP) { 1644 switch (RegWidth) { 1645 default: return -1; 1646 case 1: return AMDGPU::TTMP_32RegClassID; 1647 case 2: return AMDGPU::TTMP_64RegClassID; 1648 case 4: return AMDGPU::TTMP_128RegClassID; 1649 case 8: return AMDGPU::TTMP_256RegClassID; 1650 case 16: return AMDGPU::TTMP_512RegClassID; 1651 } 1652 } else if (Is == IS_SGPR) { 1653 switch (RegWidth) { 1654 default: return -1; 1655 case 1: return AMDGPU::SGPR_32RegClassID; 1656 case 2: return AMDGPU::SGPR_64RegClassID; 1657 case 4: return AMDGPU::SGPR_128RegClassID; 1658 case 8: return AMDGPU::SGPR_256RegClassID; 1659 case 16: return AMDGPU::SGPR_512RegClassID; 1660 } 1661 } 1662 return -1; 1663 } 1664 1665 static unsigned getSpecialRegForName(StringRef RegName) { 1666 return StringSwitch<unsigned>(RegName) 1667 .Case("exec", AMDGPU::EXEC) 1668 .Case("vcc", AMDGPU::VCC) 1669 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1670 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1671 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1672 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1673 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1674 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1675 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1676 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1677 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1678 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1679 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1680 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1681 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1682 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1683 .Case("m0", AMDGPU::M0) 1684 .Case("scc", AMDGPU::SCC) 1685 .Case("tba", AMDGPU::TBA) 1686 .Case("tma", AMDGPU::TMA) 1687 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1688 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1689 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1690 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1691 .Case("vcc_lo", AMDGPU::VCC_LO) 1692 .Case("vcc_hi", AMDGPU::VCC_HI) 1693 .Case("exec_lo", AMDGPU::EXEC_LO) 1694 .Case("exec_hi", AMDGPU::EXEC_HI) 1695 .Case("tma_lo", AMDGPU::TMA_LO) 1696 .Case("tma_hi", AMDGPU::TMA_HI) 1697 .Case("tba_lo", AMDGPU::TBA_LO) 1698 .Case("tba_hi", AMDGPU::TBA_HI) 1699 .Default(0); 1700 } 1701 1702 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1703 SMLoc &EndLoc) { 1704 auto R = parseRegister(); 1705 if (!R) return true; 1706 assert(R->isReg()); 1707 RegNo = R->getReg(); 1708 StartLoc = R->getStartLoc(); 1709 EndLoc = R->getEndLoc(); 1710 return false; 1711 } 1712 1713 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1714 RegisterKind RegKind, unsigned Reg1, 1715 unsigned RegNum) { 1716 switch (RegKind) { 1717 case IS_SPECIAL: 1718 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1719 Reg = AMDGPU::EXEC; 1720 RegWidth = 2; 1721 return true; 1722 } 1723 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1724 Reg = AMDGPU::FLAT_SCR; 1725 RegWidth = 2; 1726 return true; 1727 } 1728 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1729 Reg = AMDGPU::XNACK_MASK; 1730 RegWidth = 2; 1731 return true; 1732 } 1733 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1734 Reg = AMDGPU::VCC; 1735 RegWidth = 2; 1736 return true; 1737 } 1738 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1739 Reg = AMDGPU::TBA; 1740 RegWidth = 2; 1741 return true; 1742 } 1743 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 1744 Reg = AMDGPU::TMA; 1745 RegWidth = 2; 1746 return true; 1747 } 1748 return false; 1749 case IS_VGPR: 1750 case IS_SGPR: 1751 case IS_TTMP: 1752 if (Reg1 != Reg + RegWidth) { 1753 return false; 1754 } 1755 RegWidth++; 1756 return true; 1757 default: 1758 llvm_unreachable("unexpected register kind"); 1759 } 1760 } 1761 1762 static const StringRef Registers[] = { 1763 { "v" }, 1764 { "s" }, 1765 { "ttmp" }, 1766 }; 1767 1768 bool 1769 AMDGPUAsmParser::isRegister(const AsmToken &Token, 1770 const AsmToken &NextToken) const { 1771 1772 // A list of consecutive registers: [s0,s1,s2,s3] 1773 if (Token.is(AsmToken::LBrac)) 1774 return true; 1775 1776 if (!Token.is(AsmToken::Identifier)) 1777 return false; 1778 1779 // A single register like s0 or a range of registers like s[0:1] 1780 1781 StringRef RegName = Token.getString(); 1782 1783 for (StringRef Reg : Registers) { 1784 if (RegName.startswith(Reg)) { 1785 if (Reg.size() < RegName.size()) { 1786 unsigned RegNum; 1787 // A single register with an index: rXX 1788 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 1789 return true; 1790 } else { 1791 // A range of registers: r[XX:YY]. 1792 if (NextToken.is(AsmToken::LBrac)) 1793 return true; 1794 } 1795 } 1796 } 1797 1798 return getSpecialRegForName(RegName); 1799 } 1800 1801 bool 1802 AMDGPUAsmParser::isRegister() 1803 { 1804 return isRegister(getToken(), peekToken()); 1805 } 1806 1807 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1808 unsigned &RegNum, unsigned &RegWidth, 1809 unsigned *DwordRegIndex) { 1810 if (DwordRegIndex) { *DwordRegIndex = 0; } 1811 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 1812 if (getLexer().is(AsmToken::Identifier)) { 1813 StringRef RegName = Parser.getTok().getString(); 1814 if ((Reg = getSpecialRegForName(RegName))) { 1815 Parser.Lex(); 1816 RegKind = IS_SPECIAL; 1817 } else { 1818 unsigned RegNumIndex = 0; 1819 if (RegName[0] == 'v') { 1820 RegNumIndex = 1; 1821 RegKind = IS_VGPR; 1822 } else if (RegName[0] == 's') { 1823 RegNumIndex = 1; 1824 RegKind = IS_SGPR; 1825 } else if (RegName.startswith("ttmp")) { 1826 RegNumIndex = strlen("ttmp"); 1827 RegKind = IS_TTMP; 1828 } else { 1829 return false; 1830 } 1831 if (RegName.size() > RegNumIndex) { 1832 // Single 32-bit register: vXX. 1833 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 1834 return false; 1835 Parser.Lex(); 1836 RegWidth = 1; 1837 } else { 1838 // Range of registers: v[XX:YY]. ":YY" is optional. 1839 Parser.Lex(); 1840 int64_t RegLo, RegHi; 1841 if (getLexer().isNot(AsmToken::LBrac)) 1842 return false; 1843 Parser.Lex(); 1844 1845 if (getParser().parseAbsoluteExpression(RegLo)) 1846 return false; 1847 1848 const bool isRBrace = getLexer().is(AsmToken::RBrac); 1849 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 1850 return false; 1851 Parser.Lex(); 1852 1853 if (isRBrace) { 1854 RegHi = RegLo; 1855 } else { 1856 if (getParser().parseAbsoluteExpression(RegHi)) 1857 return false; 1858 1859 if (getLexer().isNot(AsmToken::RBrac)) 1860 return false; 1861 Parser.Lex(); 1862 } 1863 RegNum = (unsigned) RegLo; 1864 RegWidth = (RegHi - RegLo) + 1; 1865 } 1866 } 1867 } else if (getLexer().is(AsmToken::LBrac)) { 1868 // List of consecutive registers: [s0,s1,s2,s3] 1869 Parser.Lex(); 1870 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 1871 return false; 1872 if (RegWidth != 1) 1873 return false; 1874 RegisterKind RegKind1; 1875 unsigned Reg1, RegNum1, RegWidth1; 1876 do { 1877 if (getLexer().is(AsmToken::Comma)) { 1878 Parser.Lex(); 1879 } else if (getLexer().is(AsmToken::RBrac)) { 1880 Parser.Lex(); 1881 break; 1882 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 1883 if (RegWidth1 != 1) { 1884 return false; 1885 } 1886 if (RegKind1 != RegKind) { 1887 return false; 1888 } 1889 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 1890 return false; 1891 } 1892 } else { 1893 return false; 1894 } 1895 } while (true); 1896 } else { 1897 return false; 1898 } 1899 switch (RegKind) { 1900 case IS_SPECIAL: 1901 RegNum = 0; 1902 RegWidth = 1; 1903 break; 1904 case IS_VGPR: 1905 case IS_SGPR: 1906 case IS_TTMP: 1907 { 1908 unsigned Size = 1; 1909 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 1910 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 1911 Size = std::min(RegWidth, 4u); 1912 } 1913 if (RegNum % Size != 0) 1914 return false; 1915 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 1916 RegNum = RegNum / Size; 1917 int RCID = getRegClass(RegKind, RegWidth); 1918 if (RCID == -1) 1919 return false; 1920 const MCRegisterClass RC = TRI->getRegClass(RCID); 1921 if (RegNum >= RC.getNumRegs()) 1922 return false; 1923 Reg = RC.getRegister(RegNum); 1924 break; 1925 } 1926 1927 default: 1928 llvm_unreachable("unexpected register kind"); 1929 } 1930 1931 if (!subtargetHasRegister(*TRI, Reg)) 1932 return false; 1933 return true; 1934 } 1935 1936 Optional<StringRef> 1937 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 1938 switch (RegKind) { 1939 case IS_VGPR: 1940 return StringRef(".amdgcn.next_free_vgpr"); 1941 case IS_SGPR: 1942 return StringRef(".amdgcn.next_free_sgpr"); 1943 default: 1944 return None; 1945 } 1946 } 1947 1948 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 1949 auto SymbolName = getGprCountSymbolName(RegKind); 1950 assert(SymbolName && "initializing invalid register kind"); 1951 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1952 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 1953 } 1954 1955 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 1956 unsigned DwordRegIndex, 1957 unsigned RegWidth) { 1958 // Symbols are only defined for GCN targets 1959 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 1960 return true; 1961 1962 auto SymbolName = getGprCountSymbolName(RegKind); 1963 if (!SymbolName) 1964 return true; 1965 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 1966 1967 int64_t NewMax = DwordRegIndex + RegWidth - 1; 1968 int64_t OldCount; 1969 1970 if (!Sym->isVariable()) 1971 return !Error(getParser().getTok().getLoc(), 1972 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 1973 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 1974 return !Error( 1975 getParser().getTok().getLoc(), 1976 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 1977 1978 if (OldCount <= NewMax) 1979 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 1980 1981 return true; 1982 } 1983 1984 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 1985 const auto &Tok = Parser.getTok(); 1986 SMLoc StartLoc = Tok.getLoc(); 1987 SMLoc EndLoc = Tok.getEndLoc(); 1988 RegisterKind RegKind; 1989 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 1990 1991 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 1992 //FIXME: improve error messages (bug 41303). 1993 Error(StartLoc, "not a valid operand."); 1994 return nullptr; 1995 } 1996 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1997 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 1998 return nullptr; 1999 } else 2000 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2001 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false); 2002 } 2003 2004 bool 2005 AMDGPUAsmParser::parseAbsoluteExpr(int64_t &Val, bool HasSP3AbsModifier) { 2006 if (HasSP3AbsModifier) { 2007 // This is a workaround for handling expressions 2008 // as arguments of SP3 'abs' modifier, for example: 2009 // |1.0| 2010 // |-1| 2011 // |1+x| 2012 // This syntax is not compatible with syntax of standard 2013 // MC expressions (due to the trailing '|'). 2014 2015 SMLoc EndLoc; 2016 const MCExpr *Expr; 2017 SMLoc StartLoc = getLoc(); 2018 2019 if (getParser().parsePrimaryExpr(Expr, EndLoc)) { 2020 return true; 2021 } 2022 2023 if (!Expr->evaluateAsAbsolute(Val)) 2024 return Error(StartLoc, "expected absolute expression"); 2025 2026 return false; 2027 } 2028 2029 return getParser().parseAbsoluteExpression(Val); 2030 } 2031 2032 OperandMatchResultTy 2033 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2034 // TODO: add syntactic sugar for 1/(2*PI) 2035 2036 const auto& Tok = getToken(); 2037 const auto& NextTok = peekToken(); 2038 bool IsReal = Tok.is(AsmToken::Real); 2039 SMLoc S = Tok.getLoc(); 2040 bool Negate = false; 2041 2042 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2043 lex(); 2044 IsReal = true; 2045 Negate = true; 2046 } 2047 2048 if (IsReal) { 2049 // Floating-point expressions are not supported. 2050 // Can only allow floating-point literals with an 2051 // optional sign. 2052 2053 StringRef Num = getTokenStr(); 2054 lex(); 2055 2056 APFloat RealVal(APFloat::IEEEdouble()); 2057 auto roundMode = APFloat::rmNearestTiesToEven; 2058 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2059 return MatchOperand_ParseFail; 2060 } 2061 if (Negate) 2062 RealVal.changeSign(); 2063 2064 Operands.push_back( 2065 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2066 AMDGPUOperand::ImmTyNone, true)); 2067 2068 return MatchOperand_Success; 2069 2070 // FIXME: Should enable arbitrary expressions here 2071 } else if (Tok.is(AsmToken::Integer) || 2072 (Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Integer))){ 2073 2074 int64_t IntVal; 2075 if (parseAbsoluteExpr(IntVal, HasSP3AbsModifier)) 2076 return MatchOperand_ParseFail; 2077 2078 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2079 return MatchOperand_Success; 2080 } 2081 2082 return MatchOperand_NoMatch; 2083 } 2084 2085 OperandMatchResultTy 2086 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2087 if (!isRegister()) 2088 return MatchOperand_NoMatch; 2089 2090 if (auto R = parseRegister()) { 2091 assert(R->isReg()); 2092 R->Reg.IsForcedVOP3 = isForcedVOP3(); 2093 Operands.push_back(std::move(R)); 2094 return MatchOperand_Success; 2095 } 2096 return MatchOperand_ParseFail; 2097 } 2098 2099 OperandMatchResultTy 2100 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool AbsMod) { 2101 auto res = parseReg(Operands); 2102 return (res == MatchOperand_NoMatch)? 2103 parseImm(Operands, AbsMod) : 2104 res; 2105 } 2106 2107 OperandMatchResultTy 2108 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2109 bool AllowImm) { 2110 bool Negate = false, Negate2 = false, Abs = false, Abs2 = false; 2111 2112 if (getLexer().getKind()== AsmToken::Minus) { 2113 const AsmToken NextToken = getLexer().peekTok(); 2114 2115 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2116 if (NextToken.is(AsmToken::Minus)) { 2117 Error(Parser.getTok().getLoc(), "invalid syntax, expected 'neg' modifier"); 2118 return MatchOperand_ParseFail; 2119 } 2120 2121 // '-' followed by an integer literal N should be interpreted as integer 2122 // negation rather than a floating-point NEG modifier applied to N. 2123 // Beside being contr-intuitive, such use of floating-point NEG modifier 2124 // results in different meaning of integer literals used with VOP1/2/C 2125 // and VOP3, for example: 2126 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2127 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2128 // Negative fp literals should be handled likewise for unifomtity 2129 if (!NextToken.is(AsmToken::Integer) && !NextToken.is(AsmToken::Real)) { 2130 Parser.Lex(); 2131 Negate = true; 2132 } 2133 } 2134 2135 if (getLexer().getKind() == AsmToken::Identifier && 2136 Parser.getTok().getString() == "neg") { 2137 if (Negate) { 2138 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2139 return MatchOperand_ParseFail; 2140 } 2141 Parser.Lex(); 2142 Negate2 = true; 2143 if (getLexer().isNot(AsmToken::LParen)) { 2144 Error(Parser.getTok().getLoc(), "expected left paren after neg"); 2145 return MatchOperand_ParseFail; 2146 } 2147 Parser.Lex(); 2148 } 2149 2150 if (getLexer().getKind() == AsmToken::Identifier && 2151 Parser.getTok().getString() == "abs") { 2152 Parser.Lex(); 2153 Abs2 = true; 2154 if (getLexer().isNot(AsmToken::LParen)) { 2155 Error(Parser.getTok().getLoc(), "expected left paren after abs"); 2156 return MatchOperand_ParseFail; 2157 } 2158 Parser.Lex(); 2159 } 2160 2161 if (getLexer().getKind() == AsmToken::Pipe) { 2162 if (Abs2) { 2163 Error(Parser.getTok().getLoc(), "expected register or immediate"); 2164 return MatchOperand_ParseFail; 2165 } 2166 Parser.Lex(); 2167 Abs = true; 2168 } 2169 2170 OperandMatchResultTy Res; 2171 if (AllowImm) { 2172 Res = parseRegOrImm(Operands, Abs); 2173 } else { 2174 Res = parseReg(Operands); 2175 } 2176 if (Res != MatchOperand_Success) { 2177 return Res; 2178 } 2179 2180 AMDGPUOperand::Modifiers Mods; 2181 if (Abs) { 2182 if (getLexer().getKind() != AsmToken::Pipe) { 2183 Error(Parser.getTok().getLoc(), "expected vertical bar"); 2184 return MatchOperand_ParseFail; 2185 } 2186 Parser.Lex(); 2187 Mods.Abs = true; 2188 } 2189 if (Abs2) { 2190 if (getLexer().isNot(AsmToken::RParen)) { 2191 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2192 return MatchOperand_ParseFail; 2193 } 2194 Parser.Lex(); 2195 Mods.Abs = true; 2196 } 2197 2198 if (Negate) { 2199 Mods.Neg = true; 2200 } else if (Negate2) { 2201 if (getLexer().isNot(AsmToken::RParen)) { 2202 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2203 return MatchOperand_ParseFail; 2204 } 2205 Parser.Lex(); 2206 Mods.Neg = true; 2207 } 2208 2209 if (Mods.hasFPModifiers()) { 2210 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2211 Op.setModifiers(Mods); 2212 } 2213 return MatchOperand_Success; 2214 } 2215 2216 OperandMatchResultTy 2217 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2218 bool AllowImm) { 2219 bool Sext = false; 2220 2221 if (getLexer().getKind() == AsmToken::Identifier && 2222 Parser.getTok().getString() == "sext") { 2223 Parser.Lex(); 2224 Sext = true; 2225 if (getLexer().isNot(AsmToken::LParen)) { 2226 Error(Parser.getTok().getLoc(), "expected left paren after sext"); 2227 return MatchOperand_ParseFail; 2228 } 2229 Parser.Lex(); 2230 } 2231 2232 OperandMatchResultTy Res; 2233 if (AllowImm) { 2234 Res = parseRegOrImm(Operands); 2235 } else { 2236 Res = parseReg(Operands); 2237 } 2238 if (Res != MatchOperand_Success) { 2239 return Res; 2240 } 2241 2242 AMDGPUOperand::Modifiers Mods; 2243 if (Sext) { 2244 if (getLexer().isNot(AsmToken::RParen)) { 2245 Error(Parser.getTok().getLoc(), "expected closing parentheses"); 2246 return MatchOperand_ParseFail; 2247 } 2248 Parser.Lex(); 2249 Mods.Sext = true; 2250 } 2251 2252 if (Mods.hasIntModifiers()) { 2253 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2254 Op.setModifiers(Mods); 2255 } 2256 2257 return MatchOperand_Success; 2258 } 2259 2260 OperandMatchResultTy 2261 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2262 return parseRegOrImmWithFPInputMods(Operands, false); 2263 } 2264 2265 OperandMatchResultTy 2266 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2267 return parseRegOrImmWithIntInputMods(Operands, false); 2268 } 2269 2270 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2271 auto Loc = getLoc(); 2272 if (trySkipId("off")) { 2273 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2274 AMDGPUOperand::ImmTyOff, false)); 2275 return MatchOperand_Success; 2276 } 2277 2278 if (!isRegister()) 2279 return MatchOperand_NoMatch; 2280 2281 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2282 if (Reg) { 2283 Operands.push_back(std::move(Reg)); 2284 return MatchOperand_Success; 2285 } 2286 2287 return MatchOperand_ParseFail; 2288 2289 } 2290 2291 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2292 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2293 2294 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2295 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2296 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2297 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2298 return Match_InvalidOperand; 2299 2300 if ((TSFlags & SIInstrFlags::VOP3) && 2301 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2302 getForcedEncodingSize() != 64) 2303 return Match_PreferE32; 2304 2305 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2306 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2307 // v_mac_f32/16 allow only dst_sel == DWORD; 2308 auto OpNum = 2309 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2310 const auto &Op = Inst.getOperand(OpNum); 2311 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2312 return Match_InvalidOperand; 2313 } 2314 } 2315 2316 if ((TSFlags & SIInstrFlags::FLAT) && !hasFlatOffsets()) { 2317 // FIXME: Produces error without correct column reported. 2318 auto OpNum = 2319 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset); 2320 const auto &Op = Inst.getOperand(OpNum); 2321 if (Op.getImm() != 0) 2322 return Match_InvalidOperand; 2323 } 2324 2325 return Match_Success; 2326 } 2327 2328 // What asm variants we should check 2329 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2330 if (getForcedEncodingSize() == 32) { 2331 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2332 return makeArrayRef(Variants); 2333 } 2334 2335 if (isForcedVOP3()) { 2336 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2337 return makeArrayRef(Variants); 2338 } 2339 2340 if (isForcedSDWA()) { 2341 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2342 AMDGPUAsmVariants::SDWA9}; 2343 return makeArrayRef(Variants); 2344 } 2345 2346 if (isForcedDPP()) { 2347 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2348 return makeArrayRef(Variants); 2349 } 2350 2351 static const unsigned Variants[] = { 2352 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2353 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2354 }; 2355 2356 return makeArrayRef(Variants); 2357 } 2358 2359 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2360 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2361 const unsigned Num = Desc.getNumImplicitUses(); 2362 for (unsigned i = 0; i < Num; ++i) { 2363 unsigned Reg = Desc.ImplicitUses[i]; 2364 switch (Reg) { 2365 case AMDGPU::FLAT_SCR: 2366 case AMDGPU::VCC: 2367 case AMDGPU::M0: 2368 return Reg; 2369 default: 2370 break; 2371 } 2372 } 2373 return AMDGPU::NoRegister; 2374 } 2375 2376 // NB: This code is correct only when used to check constant 2377 // bus limitations because GFX7 support no f16 inline constants. 2378 // Note that there are no cases when a GFX7 opcode violates 2379 // constant bus limitations due to the use of an f16 constant. 2380 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2381 unsigned OpIdx) const { 2382 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2383 2384 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2385 return false; 2386 } 2387 2388 const MCOperand &MO = Inst.getOperand(OpIdx); 2389 2390 int64_t Val = MO.getImm(); 2391 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2392 2393 switch (OpSize) { // expected operand size 2394 case 8: 2395 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2396 case 4: 2397 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2398 case 2: { 2399 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2400 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2401 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16) { 2402 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2403 } else { 2404 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2405 } 2406 } 2407 default: 2408 llvm_unreachable("invalid operand size"); 2409 } 2410 } 2411 2412 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2413 const MCOperand &MO = Inst.getOperand(OpIdx); 2414 if (MO.isImm()) { 2415 return !isInlineConstant(Inst, OpIdx); 2416 } 2417 return !MO.isReg() || 2418 isSGPR(mc2PseudoReg(MO.getReg()), getContext().getRegisterInfo()); 2419 } 2420 2421 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2422 const unsigned Opcode = Inst.getOpcode(); 2423 const MCInstrDesc &Desc = MII.get(Opcode); 2424 unsigned ConstantBusUseCount = 0; 2425 2426 if (Desc.TSFlags & 2427 (SIInstrFlags::VOPC | 2428 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2429 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2430 SIInstrFlags::SDWA)) { 2431 // Check special imm operands (used by madmk, etc) 2432 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2433 ++ConstantBusUseCount; 2434 } 2435 2436 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2437 if (SGPRUsed != AMDGPU::NoRegister) { 2438 ++ConstantBusUseCount; 2439 } 2440 2441 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2442 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2443 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2444 2445 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2446 2447 for (int OpIdx : OpIndices) { 2448 if (OpIdx == -1) break; 2449 2450 const MCOperand &MO = Inst.getOperand(OpIdx); 2451 if (usesConstantBus(Inst, OpIdx)) { 2452 if (MO.isReg()) { 2453 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2454 // Pairs of registers with a partial intersections like these 2455 // s0, s[0:1] 2456 // flat_scratch_lo, flat_scratch 2457 // flat_scratch_lo, flat_scratch_hi 2458 // are theoretically valid but they are disabled anyway. 2459 // Note that this code mimics SIInstrInfo::verifyInstruction 2460 if (Reg != SGPRUsed) { 2461 ++ConstantBusUseCount; 2462 } 2463 SGPRUsed = Reg; 2464 } else { // Expression or a literal 2465 ++ConstantBusUseCount; 2466 } 2467 } 2468 } 2469 } 2470 2471 return ConstantBusUseCount <= 1; 2472 } 2473 2474 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2475 const unsigned Opcode = Inst.getOpcode(); 2476 const MCInstrDesc &Desc = MII.get(Opcode); 2477 2478 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2479 if (DstIdx == -1 || 2480 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2481 return true; 2482 } 2483 2484 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2485 2486 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2487 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2488 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2489 2490 assert(DstIdx != -1); 2491 const MCOperand &Dst = Inst.getOperand(DstIdx); 2492 assert(Dst.isReg()); 2493 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2494 2495 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2496 2497 for (int SrcIdx : SrcIndices) { 2498 if (SrcIdx == -1) break; 2499 const MCOperand &Src = Inst.getOperand(SrcIdx); 2500 if (Src.isReg()) { 2501 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2502 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2503 return false; 2504 } 2505 } 2506 } 2507 2508 return true; 2509 } 2510 2511 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2512 2513 const unsigned Opc = Inst.getOpcode(); 2514 const MCInstrDesc &Desc = MII.get(Opc); 2515 2516 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2517 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2518 assert(ClampIdx != -1); 2519 return Inst.getOperand(ClampIdx).getImm() == 0; 2520 } 2521 2522 return true; 2523 } 2524 2525 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2526 2527 const unsigned Opc = Inst.getOpcode(); 2528 const MCInstrDesc &Desc = MII.get(Opc); 2529 2530 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2531 return true; 2532 2533 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2534 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2535 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2536 2537 assert(VDataIdx != -1); 2538 assert(DMaskIdx != -1); 2539 assert(TFEIdx != -1); 2540 2541 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2542 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2543 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2544 if (DMask == 0) 2545 DMask = 1; 2546 2547 unsigned DataSize = 2548 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2549 if (hasPackedD16()) { 2550 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2551 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2552 DataSize = (DataSize + 1) / 2; 2553 } 2554 2555 return (VDataSize / 4) == DataSize + TFESize; 2556 } 2557 2558 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2559 2560 const unsigned Opc = Inst.getOpcode(); 2561 const MCInstrDesc &Desc = MII.get(Opc); 2562 2563 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2564 return true; 2565 if (!Desc.mayLoad() || !Desc.mayStore()) 2566 return true; // Not atomic 2567 2568 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2569 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2570 2571 // This is an incomplete check because image_atomic_cmpswap 2572 // may only use 0x3 and 0xf while other atomic operations 2573 // may use 0x1 and 0x3. However these limitations are 2574 // verified when we check that dmask matches dst size. 2575 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2576 } 2577 2578 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2579 2580 const unsigned Opc = Inst.getOpcode(); 2581 const MCInstrDesc &Desc = MII.get(Opc); 2582 2583 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2584 return true; 2585 2586 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2587 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2588 2589 // GATHER4 instructions use dmask in a different fashion compared to 2590 // other MIMG instructions. The only useful DMASK values are 2591 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2592 // (red,red,red,red) etc.) The ISA document doesn't mention 2593 // this. 2594 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2595 } 2596 2597 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2598 2599 const unsigned Opc = Inst.getOpcode(); 2600 const MCInstrDesc &Desc = MII.get(Opc); 2601 2602 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2603 return true; 2604 2605 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2606 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2607 if (isCI() || isSI()) 2608 return false; 2609 } 2610 2611 return true; 2612 } 2613 2614 static bool IsRevOpcode(const unsigned Opcode) 2615 { 2616 switch (Opcode) { 2617 case AMDGPU::V_SUBREV_F32_e32: 2618 case AMDGPU::V_SUBREV_F32_e64: 2619 case AMDGPU::V_SUBREV_F32_e32_si: 2620 case AMDGPU::V_SUBREV_F32_e32_vi: 2621 case AMDGPU::V_SUBREV_F32_e64_si: 2622 case AMDGPU::V_SUBREV_F32_e64_vi: 2623 case AMDGPU::V_SUBREV_I32_e32: 2624 case AMDGPU::V_SUBREV_I32_e64: 2625 case AMDGPU::V_SUBREV_I32_e32_si: 2626 case AMDGPU::V_SUBREV_I32_e64_si: 2627 case AMDGPU::V_SUBBREV_U32_e32: 2628 case AMDGPU::V_SUBBREV_U32_e64: 2629 case AMDGPU::V_SUBBREV_U32_e32_si: 2630 case AMDGPU::V_SUBBREV_U32_e32_vi: 2631 case AMDGPU::V_SUBBREV_U32_e64_si: 2632 case AMDGPU::V_SUBBREV_U32_e64_vi: 2633 case AMDGPU::V_SUBREV_U32_e32: 2634 case AMDGPU::V_SUBREV_U32_e64: 2635 case AMDGPU::V_SUBREV_U32_e32_gfx9: 2636 case AMDGPU::V_SUBREV_U32_e32_vi: 2637 case AMDGPU::V_SUBREV_U32_e64_gfx9: 2638 case AMDGPU::V_SUBREV_U32_e64_vi: 2639 case AMDGPU::V_SUBREV_F16_e32: 2640 case AMDGPU::V_SUBREV_F16_e64: 2641 case AMDGPU::V_SUBREV_F16_e32_vi: 2642 case AMDGPU::V_SUBREV_F16_e64_vi: 2643 case AMDGPU::V_SUBREV_U16_e32: 2644 case AMDGPU::V_SUBREV_U16_e64: 2645 case AMDGPU::V_SUBREV_U16_e32_vi: 2646 case AMDGPU::V_SUBREV_U16_e64_vi: 2647 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 2648 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 2649 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 2650 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 2651 case AMDGPU::V_LSHLREV_B32_e32_si: 2652 case AMDGPU::V_LSHLREV_B32_e64_si: 2653 case AMDGPU::V_LSHLREV_B16_e32_vi: 2654 case AMDGPU::V_LSHLREV_B16_e64_vi: 2655 case AMDGPU::V_LSHLREV_B32_e32_vi: 2656 case AMDGPU::V_LSHLREV_B32_e64_vi: 2657 case AMDGPU::V_LSHLREV_B64_vi: 2658 case AMDGPU::V_LSHRREV_B32_e32_si: 2659 case AMDGPU::V_LSHRREV_B32_e64_si: 2660 case AMDGPU::V_LSHRREV_B16_e32_vi: 2661 case AMDGPU::V_LSHRREV_B16_e64_vi: 2662 case AMDGPU::V_LSHRREV_B32_e32_vi: 2663 case AMDGPU::V_LSHRREV_B32_e64_vi: 2664 case AMDGPU::V_LSHRREV_B64_vi: 2665 case AMDGPU::V_ASHRREV_I32_e64_si: 2666 case AMDGPU::V_ASHRREV_I32_e32_si: 2667 case AMDGPU::V_ASHRREV_I16_e32_vi: 2668 case AMDGPU::V_ASHRREV_I16_e64_vi: 2669 case AMDGPU::V_ASHRREV_I32_e32_vi: 2670 case AMDGPU::V_ASHRREV_I32_e64_vi: 2671 case AMDGPU::V_ASHRREV_I64_vi: 2672 case AMDGPU::V_PK_LSHLREV_B16_vi: 2673 case AMDGPU::V_PK_LSHRREV_B16_vi: 2674 case AMDGPU::V_PK_ASHRREV_I16_vi: 2675 return true; 2676 default: 2677 return false; 2678 } 2679 } 2680 2681 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 2682 2683 using namespace SIInstrFlags; 2684 const unsigned Opcode = Inst.getOpcode(); 2685 const MCInstrDesc &Desc = MII.get(Opcode); 2686 2687 // lds_direct register is defined so that it can be used 2688 // with 9-bit operands only. Ignore encodings which do not accept these. 2689 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 2690 return true; 2691 2692 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2693 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2694 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2695 2696 const int SrcIndices[] = { Src1Idx, Src2Idx }; 2697 2698 // lds_direct cannot be specified as either src1 or src2. 2699 for (int SrcIdx : SrcIndices) { 2700 if (SrcIdx == -1) break; 2701 const MCOperand &Src = Inst.getOperand(SrcIdx); 2702 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 2703 return false; 2704 } 2705 } 2706 2707 if (Src0Idx == -1) 2708 return true; 2709 2710 const MCOperand &Src = Inst.getOperand(Src0Idx); 2711 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 2712 return true; 2713 2714 // lds_direct is specified as src0. Check additional limitations. 2715 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 2716 } 2717 2718 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 2719 unsigned Opcode = Inst.getOpcode(); 2720 const MCInstrDesc &Desc = MII.get(Opcode); 2721 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 2722 return true; 2723 2724 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2725 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2726 2727 const int OpIndices[] = { Src0Idx, Src1Idx }; 2728 2729 unsigned NumLiterals = 0; 2730 uint32_t LiteralValue; 2731 2732 for (int OpIdx : OpIndices) { 2733 if (OpIdx == -1) break; 2734 2735 const MCOperand &MO = Inst.getOperand(OpIdx); 2736 if (MO.isImm() && 2737 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 2738 AMDGPU::isSISrcOperand(Desc, OpIdx) && 2739 !isInlineConstant(Inst, OpIdx)) { 2740 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 2741 if (NumLiterals == 0 || LiteralValue != Value) { 2742 LiteralValue = Value; 2743 ++NumLiterals; 2744 } 2745 } 2746 } 2747 2748 return NumLiterals <= 1; 2749 } 2750 2751 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 2752 const SMLoc &IDLoc) { 2753 if (!validateLdsDirect(Inst)) { 2754 Error(IDLoc, 2755 "invalid use of lds_direct"); 2756 return false; 2757 } 2758 if (!validateSOPLiteral(Inst)) { 2759 Error(IDLoc, 2760 "only one literal operand is allowed"); 2761 return false; 2762 } 2763 if (!validateConstantBusLimitations(Inst)) { 2764 Error(IDLoc, 2765 "invalid operand (violates constant bus restrictions)"); 2766 return false; 2767 } 2768 if (!validateEarlyClobberLimitations(Inst)) { 2769 Error(IDLoc, 2770 "destination must be different than all sources"); 2771 return false; 2772 } 2773 if (!validateIntClampSupported(Inst)) { 2774 Error(IDLoc, 2775 "integer clamping is not supported on this GPU"); 2776 return false; 2777 } 2778 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 2779 if (!validateMIMGD16(Inst)) { 2780 Error(IDLoc, 2781 "d16 modifier is not supported on this GPU"); 2782 return false; 2783 } 2784 if (!validateMIMGDataSize(Inst)) { 2785 Error(IDLoc, 2786 "image data size does not match dmask and tfe"); 2787 return false; 2788 } 2789 if (!validateMIMGAtomicDMask(Inst)) { 2790 Error(IDLoc, 2791 "invalid atomic image dmask"); 2792 return false; 2793 } 2794 if (!validateMIMGGatherDMask(Inst)) { 2795 Error(IDLoc, 2796 "invalid image_gather dmask: only one bit must be set"); 2797 return false; 2798 } 2799 2800 return true; 2801 } 2802 2803 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 2804 const FeatureBitset &FBS, 2805 unsigned VariantID = 0); 2806 2807 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 2808 OperandVector &Operands, 2809 MCStreamer &Out, 2810 uint64_t &ErrorInfo, 2811 bool MatchingInlineAsm) { 2812 MCInst Inst; 2813 unsigned Result = Match_Success; 2814 for (auto Variant : getMatchedVariants()) { 2815 uint64_t EI; 2816 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 2817 Variant); 2818 // We order match statuses from least to most specific. We use most specific 2819 // status as resulting 2820 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 2821 if ((R == Match_Success) || 2822 (R == Match_PreferE32) || 2823 (R == Match_MissingFeature && Result != Match_PreferE32) || 2824 (R == Match_InvalidOperand && Result != Match_MissingFeature 2825 && Result != Match_PreferE32) || 2826 (R == Match_MnemonicFail && Result != Match_InvalidOperand 2827 && Result != Match_MissingFeature 2828 && Result != Match_PreferE32)) { 2829 Result = R; 2830 ErrorInfo = EI; 2831 } 2832 if (R == Match_Success) 2833 break; 2834 } 2835 2836 switch (Result) { 2837 default: break; 2838 case Match_Success: 2839 if (!validateInstruction(Inst, IDLoc)) { 2840 return true; 2841 } 2842 Inst.setLoc(IDLoc); 2843 Out.EmitInstruction(Inst, getSTI()); 2844 return false; 2845 2846 case Match_MissingFeature: 2847 return Error(IDLoc, "instruction not supported on this GPU"); 2848 2849 case Match_MnemonicFail: { 2850 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 2851 std::string Suggestion = AMDGPUMnemonicSpellCheck( 2852 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 2853 return Error(IDLoc, "invalid instruction" + Suggestion, 2854 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 2855 } 2856 2857 case Match_InvalidOperand: { 2858 SMLoc ErrorLoc = IDLoc; 2859 if (ErrorInfo != ~0ULL) { 2860 if (ErrorInfo >= Operands.size()) { 2861 return Error(IDLoc, "too few operands for instruction"); 2862 } 2863 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 2864 if (ErrorLoc == SMLoc()) 2865 ErrorLoc = IDLoc; 2866 } 2867 return Error(ErrorLoc, "invalid operand for instruction"); 2868 } 2869 2870 case Match_PreferE32: 2871 return Error(IDLoc, "internal error: instruction without _e64 suffix " 2872 "should be encoded as e32"); 2873 } 2874 llvm_unreachable("Implement any new match types added!"); 2875 } 2876 2877 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 2878 int64_t Tmp = -1; 2879 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 2880 return true; 2881 } 2882 if (getParser().parseAbsoluteExpression(Tmp)) { 2883 return true; 2884 } 2885 Ret = static_cast<uint32_t>(Tmp); 2886 return false; 2887 } 2888 2889 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 2890 uint32_t &Minor) { 2891 if (ParseAsAbsoluteExpression(Major)) 2892 return TokError("invalid major version"); 2893 2894 if (getLexer().isNot(AsmToken::Comma)) 2895 return TokError("minor version number required, comma expected"); 2896 Lex(); 2897 2898 if (ParseAsAbsoluteExpression(Minor)) 2899 return TokError("invalid minor version"); 2900 2901 return false; 2902 } 2903 2904 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 2905 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2906 return TokError("directive only supported for amdgcn architecture"); 2907 2908 std::string Target; 2909 2910 SMLoc TargetStart = getTok().getLoc(); 2911 if (getParser().parseEscapedString(Target)) 2912 return true; 2913 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 2914 2915 std::string ExpectedTarget; 2916 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 2917 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 2918 2919 if (Target != ExpectedTargetOS.str()) 2920 return getParser().Error(TargetRange.Start, "target must match options", 2921 TargetRange); 2922 2923 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 2924 return false; 2925 } 2926 2927 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 2928 return getParser().Error(Range.Start, "value out of range", Range); 2929 } 2930 2931 bool AMDGPUAsmParser::calculateGPRBlocks( 2932 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 2933 bool XNACKUsed, unsigned NextFreeVGPR, SMRange VGPRRange, 2934 unsigned NextFreeSGPR, SMRange SGPRRange, unsigned &VGPRBlocks, 2935 unsigned &SGPRBlocks) { 2936 // TODO(scott.linder): These calculations are duplicated from 2937 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 2938 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 2939 2940 unsigned NumVGPRs = NextFreeVGPR; 2941 unsigned NumSGPRs = NextFreeSGPR; 2942 unsigned MaxAddressableNumSGPRs = IsaInfo::getAddressableNumSGPRs(&getSTI()); 2943 2944 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 2945 NumSGPRs > MaxAddressableNumSGPRs) 2946 return OutOfRangeError(SGPRRange); 2947 2948 NumSGPRs += 2949 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 2950 2951 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 2952 NumSGPRs > MaxAddressableNumSGPRs) 2953 return OutOfRangeError(SGPRRange); 2954 2955 if (Features.test(FeatureSGPRInitBug)) 2956 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 2957 2958 VGPRBlocks = IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs); 2959 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 2960 2961 return false; 2962 } 2963 2964 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 2965 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 2966 return TokError("directive only supported for amdgcn architecture"); 2967 2968 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 2969 return TokError("directive only supported for amdhsa OS"); 2970 2971 StringRef KernelName; 2972 if (getParser().parseIdentifier(KernelName)) 2973 return true; 2974 2975 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(); 2976 2977 StringSet<> Seen; 2978 2979 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 2980 2981 SMRange VGPRRange; 2982 uint64_t NextFreeVGPR = 0; 2983 SMRange SGPRRange; 2984 uint64_t NextFreeSGPR = 0; 2985 unsigned UserSGPRCount = 0; 2986 bool ReserveVCC = true; 2987 bool ReserveFlatScr = true; 2988 bool ReserveXNACK = hasXNACK(); 2989 2990 while (true) { 2991 while (getLexer().is(AsmToken::EndOfStatement)) 2992 Lex(); 2993 2994 if (getLexer().isNot(AsmToken::Identifier)) 2995 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 2996 2997 StringRef ID = getTok().getIdentifier(); 2998 SMRange IDRange = getTok().getLocRange(); 2999 Lex(); 3000 3001 if (ID == ".end_amdhsa_kernel") 3002 break; 3003 3004 if (Seen.find(ID) != Seen.end()) 3005 return TokError(".amdhsa_ directives cannot be repeated"); 3006 Seen.insert(ID); 3007 3008 SMLoc ValStart = getTok().getLoc(); 3009 int64_t IVal; 3010 if (getParser().parseAbsoluteExpression(IVal)) 3011 return true; 3012 SMLoc ValEnd = getTok().getLoc(); 3013 SMRange ValRange = SMRange(ValStart, ValEnd); 3014 3015 if (IVal < 0) 3016 return OutOfRangeError(ValRange); 3017 3018 uint64_t Val = IVal; 3019 3020 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3021 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3022 return OutOfRangeError(RANGE); \ 3023 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3024 3025 if (ID == ".amdhsa_group_segment_fixed_size") { 3026 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3027 return OutOfRangeError(ValRange); 3028 KD.group_segment_fixed_size = Val; 3029 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3030 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3031 return OutOfRangeError(ValRange); 3032 KD.private_segment_fixed_size = Val; 3033 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3034 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3035 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3036 Val, ValRange); 3037 UserSGPRCount += 4; 3038 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3039 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3040 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3041 ValRange); 3042 UserSGPRCount += 2; 3043 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3044 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3045 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3046 ValRange); 3047 UserSGPRCount += 2; 3048 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3049 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3050 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3051 Val, ValRange); 3052 UserSGPRCount += 2; 3053 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3054 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3055 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3056 ValRange); 3057 UserSGPRCount += 2; 3058 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3059 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3060 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3061 ValRange); 3062 UserSGPRCount += 2; 3063 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3064 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3065 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3066 Val, ValRange); 3067 UserSGPRCount += 1; 3068 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3069 PARSE_BITS_ENTRY( 3070 KD.compute_pgm_rsrc2, 3071 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3072 ValRange); 3073 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3074 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3075 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3076 ValRange); 3077 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3078 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3079 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3080 ValRange); 3081 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3082 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3083 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3084 ValRange); 3085 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3086 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3087 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3088 ValRange); 3089 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3090 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3091 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3092 ValRange); 3093 } else if (ID == ".amdhsa_next_free_vgpr") { 3094 VGPRRange = ValRange; 3095 NextFreeVGPR = Val; 3096 } else if (ID == ".amdhsa_next_free_sgpr") { 3097 SGPRRange = ValRange; 3098 NextFreeSGPR = Val; 3099 } else if (ID == ".amdhsa_reserve_vcc") { 3100 if (!isUInt<1>(Val)) 3101 return OutOfRangeError(ValRange); 3102 ReserveVCC = Val; 3103 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3104 if (IVersion.Major < 7) 3105 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3106 IDRange); 3107 if (!isUInt<1>(Val)) 3108 return OutOfRangeError(ValRange); 3109 ReserveFlatScr = Val; 3110 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3111 if (IVersion.Major < 8) 3112 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3113 IDRange); 3114 if (!isUInt<1>(Val)) 3115 return OutOfRangeError(ValRange); 3116 ReserveXNACK = Val; 3117 } else if (ID == ".amdhsa_float_round_mode_32") { 3118 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3119 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3120 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3121 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3122 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3123 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3124 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3125 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3126 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3127 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3128 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3129 ValRange); 3130 } else if (ID == ".amdhsa_dx10_clamp") { 3131 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3132 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3133 } else if (ID == ".amdhsa_ieee_mode") { 3134 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3135 Val, ValRange); 3136 } else if (ID == ".amdhsa_fp16_overflow") { 3137 if (IVersion.Major < 9) 3138 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3139 IDRange); 3140 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3141 ValRange); 3142 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3143 PARSE_BITS_ENTRY( 3144 KD.compute_pgm_rsrc2, 3145 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3146 ValRange); 3147 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3148 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3149 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3150 Val, ValRange); 3151 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3152 PARSE_BITS_ENTRY( 3153 KD.compute_pgm_rsrc2, 3154 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3155 ValRange); 3156 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3157 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3158 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3159 Val, ValRange); 3160 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3161 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3162 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3163 Val, ValRange); 3164 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3165 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3166 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3167 Val, ValRange); 3168 } else if (ID == ".amdhsa_exception_int_div_zero") { 3169 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3170 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3171 Val, ValRange); 3172 } else { 3173 return getParser().Error(IDRange.Start, 3174 "unknown .amdhsa_kernel directive", IDRange); 3175 } 3176 3177 #undef PARSE_BITS_ENTRY 3178 } 3179 3180 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3181 return TokError(".amdhsa_next_free_vgpr directive is required"); 3182 3183 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3184 return TokError(".amdhsa_next_free_sgpr directive is required"); 3185 3186 unsigned VGPRBlocks; 3187 unsigned SGPRBlocks; 3188 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3189 ReserveXNACK, NextFreeVGPR, VGPRRange, NextFreeSGPR, 3190 SGPRRange, VGPRBlocks, SGPRBlocks)) 3191 return true; 3192 3193 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3194 VGPRBlocks)) 3195 return OutOfRangeError(VGPRRange); 3196 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3197 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3198 3199 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3200 SGPRBlocks)) 3201 return OutOfRangeError(SGPRRange); 3202 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3203 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3204 SGPRBlocks); 3205 3206 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3207 return TokError("too many user SGPRs enabled"); 3208 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3209 UserSGPRCount); 3210 3211 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3212 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3213 ReserveFlatScr, ReserveXNACK); 3214 return false; 3215 } 3216 3217 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3218 uint32_t Major; 3219 uint32_t Minor; 3220 3221 if (ParseDirectiveMajorMinor(Major, Minor)) 3222 return true; 3223 3224 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3225 return false; 3226 } 3227 3228 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3229 uint32_t Major; 3230 uint32_t Minor; 3231 uint32_t Stepping; 3232 StringRef VendorName; 3233 StringRef ArchName; 3234 3235 // If this directive has no arguments, then use the ISA version for the 3236 // targeted GPU. 3237 if (getLexer().is(AsmToken::EndOfStatement)) { 3238 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3239 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3240 ISA.Stepping, 3241 "AMD", "AMDGPU"); 3242 return false; 3243 } 3244 3245 if (ParseDirectiveMajorMinor(Major, Minor)) 3246 return true; 3247 3248 if (getLexer().isNot(AsmToken::Comma)) 3249 return TokError("stepping version number required, comma expected"); 3250 Lex(); 3251 3252 if (ParseAsAbsoluteExpression(Stepping)) 3253 return TokError("invalid stepping version"); 3254 3255 if (getLexer().isNot(AsmToken::Comma)) 3256 return TokError("vendor name required, comma expected"); 3257 Lex(); 3258 3259 if (getLexer().isNot(AsmToken::String)) 3260 return TokError("invalid vendor name"); 3261 3262 VendorName = getLexer().getTok().getStringContents(); 3263 Lex(); 3264 3265 if (getLexer().isNot(AsmToken::Comma)) 3266 return TokError("arch name required, comma expected"); 3267 Lex(); 3268 3269 if (getLexer().isNot(AsmToken::String)) 3270 return TokError("invalid arch name"); 3271 3272 ArchName = getLexer().getTok().getStringContents(); 3273 Lex(); 3274 3275 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3276 VendorName, ArchName); 3277 return false; 3278 } 3279 3280 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3281 amd_kernel_code_t &Header) { 3282 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3283 // assembly for backwards compatibility. 3284 if (ID == "max_scratch_backing_memory_byte_size") { 3285 Parser.eatToEndOfStatement(); 3286 return false; 3287 } 3288 3289 SmallString<40> ErrStr; 3290 raw_svector_ostream Err(ErrStr); 3291 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3292 return TokError(Err.str()); 3293 } 3294 Lex(); 3295 return false; 3296 } 3297 3298 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3299 amd_kernel_code_t Header; 3300 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3301 3302 while (true) { 3303 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3304 // will set the current token to EndOfStatement. 3305 while(getLexer().is(AsmToken::EndOfStatement)) 3306 Lex(); 3307 3308 if (getLexer().isNot(AsmToken::Identifier)) 3309 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3310 3311 StringRef ID = getLexer().getTok().getIdentifier(); 3312 Lex(); 3313 3314 if (ID == ".end_amd_kernel_code_t") 3315 break; 3316 3317 if (ParseAMDKernelCodeTValue(ID, Header)) 3318 return true; 3319 } 3320 3321 getTargetStreamer().EmitAMDKernelCodeT(Header); 3322 3323 return false; 3324 } 3325 3326 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 3327 if (getLexer().isNot(AsmToken::Identifier)) 3328 return TokError("expected symbol name"); 3329 3330 StringRef KernelName = Parser.getTok().getString(); 3331 3332 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 3333 ELF::STT_AMDGPU_HSA_KERNEL); 3334 Lex(); 3335 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 3336 KernelScope.initialize(getContext()); 3337 return false; 3338 } 3339 3340 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 3341 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 3342 return Error(getParser().getTok().getLoc(), 3343 ".amd_amdgpu_isa directive is not available on non-amdgcn " 3344 "architectures"); 3345 } 3346 3347 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 3348 3349 std::string ISAVersionStringFromSTI; 3350 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 3351 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 3352 3353 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 3354 return Error(getParser().getTok().getLoc(), 3355 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 3356 "arguments specified through the command line"); 3357 } 3358 3359 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 3360 Lex(); 3361 3362 return false; 3363 } 3364 3365 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 3366 const char *AssemblerDirectiveBegin; 3367 const char *AssemblerDirectiveEnd; 3368 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 3369 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 3370 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 3371 HSAMD::V3::AssemblerDirectiveEnd) 3372 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 3373 HSAMD::AssemblerDirectiveEnd); 3374 3375 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 3376 return Error(getParser().getTok().getLoc(), 3377 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 3378 "not available on non-amdhsa OSes")).str()); 3379 } 3380 3381 std::string HSAMetadataString; 3382 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 3383 HSAMetadataString)) 3384 return true; 3385 3386 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 3387 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 3388 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3389 } else { 3390 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 3391 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 3392 } 3393 3394 return false; 3395 } 3396 3397 /// Common code to parse out a block of text (typically YAML) between start and 3398 /// end directives. 3399 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 3400 const char *AssemblerDirectiveEnd, 3401 std::string &CollectString) { 3402 3403 raw_string_ostream CollectStream(CollectString); 3404 3405 getLexer().setSkipSpace(false); 3406 3407 bool FoundEnd = false; 3408 while (!getLexer().is(AsmToken::Eof)) { 3409 while (getLexer().is(AsmToken::Space)) { 3410 CollectStream << getLexer().getTok().getString(); 3411 Lex(); 3412 } 3413 3414 if (getLexer().is(AsmToken::Identifier)) { 3415 StringRef ID = getLexer().getTok().getIdentifier(); 3416 if (ID == AssemblerDirectiveEnd) { 3417 Lex(); 3418 FoundEnd = true; 3419 break; 3420 } 3421 } 3422 3423 CollectStream << Parser.parseStringToEndOfStatement() 3424 << getContext().getAsmInfo()->getSeparatorString(); 3425 3426 Parser.eatToEndOfStatement(); 3427 } 3428 3429 getLexer().setSkipSpace(true); 3430 3431 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 3432 return TokError(Twine("expected directive ") + 3433 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 3434 } 3435 3436 CollectStream.flush(); 3437 return false; 3438 } 3439 3440 /// Parse the assembler directive for new MsgPack-format PAL metadata. 3441 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 3442 std::string String; 3443 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 3444 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 3445 return true; 3446 3447 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3448 if (!PALMetadata->setFromString(String)) 3449 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 3450 return false; 3451 } 3452 3453 /// Parse the assembler directive for old linear-format PAL metadata. 3454 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 3455 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 3456 return Error(getParser().getTok().getLoc(), 3457 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 3458 "not available on non-amdpal OSes")).str()); 3459 } 3460 3461 auto PALMetadata = getTargetStreamer().getPALMetadata(); 3462 PALMetadata->setLegacy(); 3463 for (;;) { 3464 uint32_t Key, Value; 3465 if (ParseAsAbsoluteExpression(Key)) { 3466 return TokError(Twine("invalid value in ") + 3467 Twine(PALMD::AssemblerDirective)); 3468 } 3469 if (getLexer().isNot(AsmToken::Comma)) { 3470 return TokError(Twine("expected an even number of values in ") + 3471 Twine(PALMD::AssemblerDirective)); 3472 } 3473 Lex(); 3474 if (ParseAsAbsoluteExpression(Value)) { 3475 return TokError(Twine("invalid value in ") + 3476 Twine(PALMD::AssemblerDirective)); 3477 } 3478 PALMetadata->setRegister(Key, Value); 3479 if (getLexer().isNot(AsmToken::Comma)) 3480 break; 3481 Lex(); 3482 } 3483 return false; 3484 } 3485 3486 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 3487 StringRef IDVal = DirectiveID.getString(); 3488 3489 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 3490 if (IDVal == ".amdgcn_target") 3491 return ParseDirectiveAMDGCNTarget(); 3492 3493 if (IDVal == ".amdhsa_kernel") 3494 return ParseDirectiveAMDHSAKernel(); 3495 3496 // TODO: Restructure/combine with PAL metadata directive. 3497 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 3498 return ParseDirectiveHSAMetadata(); 3499 } else { 3500 if (IDVal == ".hsa_code_object_version") 3501 return ParseDirectiveHSACodeObjectVersion(); 3502 3503 if (IDVal == ".hsa_code_object_isa") 3504 return ParseDirectiveHSACodeObjectISA(); 3505 3506 if (IDVal == ".amd_kernel_code_t") 3507 return ParseDirectiveAMDKernelCodeT(); 3508 3509 if (IDVal == ".amdgpu_hsa_kernel") 3510 return ParseDirectiveAMDGPUHsaKernel(); 3511 3512 if (IDVal == ".amd_amdgpu_isa") 3513 return ParseDirectiveISAVersion(); 3514 3515 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 3516 return ParseDirectiveHSAMetadata(); 3517 } 3518 3519 if (IDVal == PALMD::AssemblerDirectiveBegin) 3520 return ParseDirectivePALMetadataBegin(); 3521 3522 if (IDVal == PALMD::AssemblerDirective) 3523 return ParseDirectivePALMetadata(); 3524 3525 return true; 3526 } 3527 3528 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 3529 unsigned RegNo) const { 3530 3531 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 3532 R.isValid(); ++R) { 3533 if (*R == RegNo) 3534 return isGFX9(); 3535 } 3536 3537 switch (RegNo) { 3538 case AMDGPU::TBA: 3539 case AMDGPU::TBA_LO: 3540 case AMDGPU::TBA_HI: 3541 case AMDGPU::TMA: 3542 case AMDGPU::TMA_LO: 3543 case AMDGPU::TMA_HI: 3544 return !isGFX9(); 3545 case AMDGPU::XNACK_MASK: 3546 case AMDGPU::XNACK_MASK_LO: 3547 case AMDGPU::XNACK_MASK_HI: 3548 return !isCI() && !isSI() && hasXNACK(); 3549 default: 3550 break; 3551 } 3552 3553 if (isInlineValue(RegNo)) 3554 return !isCI() && !isSI() && !isVI(); 3555 3556 if (isCI()) 3557 return true; 3558 3559 if (isSI()) { 3560 // No flat_scr 3561 switch (RegNo) { 3562 case AMDGPU::FLAT_SCR: 3563 case AMDGPU::FLAT_SCR_LO: 3564 case AMDGPU::FLAT_SCR_HI: 3565 return false; 3566 default: 3567 return true; 3568 } 3569 } 3570 3571 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 3572 // SI/CI have. 3573 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 3574 R.isValid(); ++R) { 3575 if (*R == RegNo) 3576 return false; 3577 } 3578 3579 return true; 3580 } 3581 3582 OperandMatchResultTy 3583 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { 3584 // Try to parse with a custom parser 3585 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 3586 3587 // If we successfully parsed the operand or if there as an error parsing, 3588 // we are done. 3589 // 3590 // If we are parsing after we reach EndOfStatement then this means we 3591 // are appending default values to the Operands list. This is only done 3592 // by custom parser, so we shouldn't continue on to the generic parsing. 3593 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 3594 getLexer().is(AsmToken::EndOfStatement)) 3595 return ResTy; 3596 3597 ResTy = parseRegOrImm(Operands); 3598 3599 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail) 3600 return ResTy; 3601 3602 const auto &Tok = Parser.getTok(); 3603 SMLoc S = Tok.getLoc(); 3604 3605 const MCExpr *Expr = nullptr; 3606 if (!Parser.parseExpression(Expr)) { 3607 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 3608 return MatchOperand_Success; 3609 } 3610 3611 // Possibly this is an instruction flag like 'gds'. 3612 if (Tok.getKind() == AsmToken::Identifier) { 3613 Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), S)); 3614 Parser.Lex(); 3615 return MatchOperand_Success; 3616 } 3617 3618 return MatchOperand_NoMatch; 3619 } 3620 3621 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 3622 // Clear any forced encodings from the previous instruction. 3623 setForcedEncodingSize(0); 3624 setForcedDPP(false); 3625 setForcedSDWA(false); 3626 3627 if (Name.endswith("_e64")) { 3628 setForcedEncodingSize(64); 3629 return Name.substr(0, Name.size() - 4); 3630 } else if (Name.endswith("_e32")) { 3631 setForcedEncodingSize(32); 3632 return Name.substr(0, Name.size() - 4); 3633 } else if (Name.endswith("_dpp")) { 3634 setForcedDPP(true); 3635 return Name.substr(0, Name.size() - 4); 3636 } else if (Name.endswith("_sdwa")) { 3637 setForcedSDWA(true); 3638 return Name.substr(0, Name.size() - 5); 3639 } 3640 return Name; 3641 } 3642 3643 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 3644 StringRef Name, 3645 SMLoc NameLoc, OperandVector &Operands) { 3646 // Add the instruction mnemonic 3647 Name = parseMnemonicSuffix(Name); 3648 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 3649 3650 while (!getLexer().is(AsmToken::EndOfStatement)) { 3651 OperandMatchResultTy Res = parseOperand(Operands, Name); 3652 3653 // Eat the comma or space if there is one. 3654 if (getLexer().is(AsmToken::Comma)) 3655 Parser.Lex(); 3656 3657 switch (Res) { 3658 case MatchOperand_Success: break; 3659 case MatchOperand_ParseFail: 3660 Error(getLexer().getLoc(), "failed parsing operand."); 3661 while (!getLexer().is(AsmToken::EndOfStatement)) { 3662 Parser.Lex(); 3663 } 3664 return true; 3665 case MatchOperand_NoMatch: 3666 Error(getLexer().getLoc(), "not a valid operand."); 3667 while (!getLexer().is(AsmToken::EndOfStatement)) { 3668 Parser.Lex(); 3669 } 3670 return true; 3671 } 3672 } 3673 3674 return false; 3675 } 3676 3677 //===----------------------------------------------------------------------===// 3678 // Utility functions 3679 //===----------------------------------------------------------------------===// 3680 3681 OperandMatchResultTy 3682 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) { 3683 switch(getLexer().getKind()) { 3684 default: return MatchOperand_NoMatch; 3685 case AsmToken::Identifier: { 3686 StringRef Name = Parser.getTok().getString(); 3687 if (!Name.equals(Prefix)) { 3688 return MatchOperand_NoMatch; 3689 } 3690 3691 Parser.Lex(); 3692 if (getLexer().isNot(AsmToken::Colon)) 3693 return MatchOperand_ParseFail; 3694 3695 Parser.Lex(); 3696 3697 bool IsMinus = false; 3698 if (getLexer().getKind() == AsmToken::Minus) { 3699 Parser.Lex(); 3700 IsMinus = true; 3701 } 3702 3703 if (getLexer().isNot(AsmToken::Integer)) 3704 return MatchOperand_ParseFail; 3705 3706 if (getParser().parseAbsoluteExpression(Int)) 3707 return MatchOperand_ParseFail; 3708 3709 if (IsMinus) 3710 Int = -Int; 3711 break; 3712 } 3713 } 3714 return MatchOperand_Success; 3715 } 3716 3717 OperandMatchResultTy 3718 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 3719 AMDGPUOperand::ImmTy ImmTy, 3720 bool (*ConvertResult)(int64_t&)) { 3721 SMLoc S = Parser.getTok().getLoc(); 3722 int64_t Value = 0; 3723 3724 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 3725 if (Res != MatchOperand_Success) 3726 return Res; 3727 3728 if (ConvertResult && !ConvertResult(Value)) { 3729 return MatchOperand_ParseFail; 3730 } 3731 3732 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 3733 return MatchOperand_Success; 3734 } 3735 3736 OperandMatchResultTy AMDGPUAsmParser::parseOperandArrayWithPrefix( 3737 const char *Prefix, 3738 OperandVector &Operands, 3739 AMDGPUOperand::ImmTy ImmTy, 3740 bool (*ConvertResult)(int64_t&)) { 3741 StringRef Name = Parser.getTok().getString(); 3742 if (!Name.equals(Prefix)) 3743 return MatchOperand_NoMatch; 3744 3745 Parser.Lex(); 3746 if (getLexer().isNot(AsmToken::Colon)) 3747 return MatchOperand_ParseFail; 3748 3749 Parser.Lex(); 3750 if (getLexer().isNot(AsmToken::LBrac)) 3751 return MatchOperand_ParseFail; 3752 Parser.Lex(); 3753 3754 unsigned Val = 0; 3755 SMLoc S = Parser.getTok().getLoc(); 3756 3757 // FIXME: How to verify the number of elements matches the number of src 3758 // operands? 3759 for (int I = 0; I < 4; ++I) { 3760 if (I != 0) { 3761 if (getLexer().is(AsmToken::RBrac)) 3762 break; 3763 3764 if (getLexer().isNot(AsmToken::Comma)) 3765 return MatchOperand_ParseFail; 3766 Parser.Lex(); 3767 } 3768 3769 if (getLexer().isNot(AsmToken::Integer)) 3770 return MatchOperand_ParseFail; 3771 3772 int64_t Op; 3773 if (getParser().parseAbsoluteExpression(Op)) 3774 return MatchOperand_ParseFail; 3775 3776 if (Op != 0 && Op != 1) 3777 return MatchOperand_ParseFail; 3778 Val |= (Op << I); 3779 } 3780 3781 Parser.Lex(); 3782 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 3783 return MatchOperand_Success; 3784 } 3785 3786 OperandMatchResultTy 3787 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 3788 AMDGPUOperand::ImmTy ImmTy) { 3789 int64_t Bit = 0; 3790 SMLoc S = Parser.getTok().getLoc(); 3791 3792 // We are at the end of the statement, and this is a default argument, so 3793 // use a default value. 3794 if (getLexer().isNot(AsmToken::EndOfStatement)) { 3795 switch(getLexer().getKind()) { 3796 case AsmToken::Identifier: { 3797 StringRef Tok = Parser.getTok().getString(); 3798 if (Tok == Name) { 3799 if (Tok == "r128" && isGFX9()) 3800 Error(S, "r128 modifier is not supported on this GPU"); 3801 if (Tok == "a16" && !isGFX9()) 3802 Error(S, "a16 modifier is not supported on this GPU"); 3803 Bit = 1; 3804 Parser.Lex(); 3805 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 3806 Bit = 0; 3807 Parser.Lex(); 3808 } else { 3809 return MatchOperand_NoMatch; 3810 } 3811 break; 3812 } 3813 default: 3814 return MatchOperand_NoMatch; 3815 } 3816 } 3817 3818 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 3819 return MatchOperand_Success; 3820 } 3821 3822 static void addOptionalImmOperand( 3823 MCInst& Inst, const OperandVector& Operands, 3824 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 3825 AMDGPUOperand::ImmTy ImmT, 3826 int64_t Default = 0) { 3827 auto i = OptionalIdx.find(ImmT); 3828 if (i != OptionalIdx.end()) { 3829 unsigned Idx = i->second; 3830 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 3831 } else { 3832 Inst.addOperand(MCOperand::createImm(Default)); 3833 } 3834 } 3835 3836 OperandMatchResultTy 3837 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 3838 if (getLexer().isNot(AsmToken::Identifier)) { 3839 return MatchOperand_NoMatch; 3840 } 3841 StringRef Tok = Parser.getTok().getString(); 3842 if (Tok != Prefix) { 3843 return MatchOperand_NoMatch; 3844 } 3845 3846 Parser.Lex(); 3847 if (getLexer().isNot(AsmToken::Colon)) { 3848 return MatchOperand_ParseFail; 3849 } 3850 3851 Parser.Lex(); 3852 if (getLexer().isNot(AsmToken::Identifier)) { 3853 return MatchOperand_ParseFail; 3854 } 3855 3856 Value = Parser.getTok().getString(); 3857 return MatchOperand_Success; 3858 } 3859 3860 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 3861 // values to live in a joint format operand in the MCInst encoding. 3862 OperandMatchResultTy 3863 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 3864 SMLoc S = Parser.getTok().getLoc(); 3865 int64_t Dfmt = 0, Nfmt = 0; 3866 // dfmt and nfmt can appear in either order, and each is optional. 3867 bool GotDfmt = false, GotNfmt = false; 3868 while (!GotDfmt || !GotNfmt) { 3869 if (!GotDfmt) { 3870 auto Res = parseIntWithPrefix("dfmt", Dfmt); 3871 if (Res != MatchOperand_NoMatch) { 3872 if (Res != MatchOperand_Success) 3873 return Res; 3874 if (Dfmt >= 16) { 3875 Error(Parser.getTok().getLoc(), "out of range dfmt"); 3876 return MatchOperand_ParseFail; 3877 } 3878 GotDfmt = true; 3879 Parser.Lex(); 3880 continue; 3881 } 3882 } 3883 if (!GotNfmt) { 3884 auto Res = parseIntWithPrefix("nfmt", Nfmt); 3885 if (Res != MatchOperand_NoMatch) { 3886 if (Res != MatchOperand_Success) 3887 return Res; 3888 if (Nfmt >= 8) { 3889 Error(Parser.getTok().getLoc(), "out of range nfmt"); 3890 return MatchOperand_ParseFail; 3891 } 3892 GotNfmt = true; 3893 Parser.Lex(); 3894 continue; 3895 } 3896 } 3897 break; 3898 } 3899 if (!GotDfmt && !GotNfmt) 3900 return MatchOperand_NoMatch; 3901 auto Format = Dfmt | Nfmt << 4; 3902 Operands.push_back( 3903 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 3904 return MatchOperand_Success; 3905 } 3906 3907 //===----------------------------------------------------------------------===// 3908 // ds 3909 //===----------------------------------------------------------------------===// 3910 3911 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 3912 const OperandVector &Operands) { 3913 OptionalImmIndexMap OptionalIdx; 3914 3915 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3916 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3917 3918 // Add the register arguments 3919 if (Op.isReg()) { 3920 Op.addRegOperands(Inst, 1); 3921 continue; 3922 } 3923 3924 // Handle optional arguments 3925 OptionalIdx[Op.getImmTy()] = i; 3926 } 3927 3928 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 3929 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 3930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3931 3932 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3933 } 3934 3935 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 3936 bool IsGdsHardcoded) { 3937 OptionalImmIndexMap OptionalIdx; 3938 3939 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3940 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3941 3942 // Add the register arguments 3943 if (Op.isReg()) { 3944 Op.addRegOperands(Inst, 1); 3945 continue; 3946 } 3947 3948 if (Op.isToken() && Op.getToken() == "gds") { 3949 IsGdsHardcoded = true; 3950 continue; 3951 } 3952 3953 // Handle optional arguments 3954 OptionalIdx[Op.getImmTy()] = i; 3955 } 3956 3957 AMDGPUOperand::ImmTy OffsetType = 3958 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_si || 3959 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 3960 AMDGPUOperand::ImmTyOffset; 3961 3962 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 3963 3964 if (!IsGdsHardcoded) { 3965 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 3966 } 3967 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 3968 } 3969 3970 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 3971 OptionalImmIndexMap OptionalIdx; 3972 3973 unsigned OperandIdx[4]; 3974 unsigned EnMask = 0; 3975 int SrcIdx = 0; 3976 3977 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3978 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3979 3980 // Add the register arguments 3981 if (Op.isReg()) { 3982 assert(SrcIdx < 4); 3983 OperandIdx[SrcIdx] = Inst.size(); 3984 Op.addRegOperands(Inst, 1); 3985 ++SrcIdx; 3986 continue; 3987 } 3988 3989 if (Op.isOff()) { 3990 assert(SrcIdx < 4); 3991 OperandIdx[SrcIdx] = Inst.size(); 3992 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 3993 ++SrcIdx; 3994 continue; 3995 } 3996 3997 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 3998 Op.addImmOperands(Inst, 1); 3999 continue; 4000 } 4001 4002 if (Op.isToken() && Op.getToken() == "done") 4003 continue; 4004 4005 // Handle optional arguments 4006 OptionalIdx[Op.getImmTy()] = i; 4007 } 4008 4009 assert(SrcIdx == 4); 4010 4011 bool Compr = false; 4012 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4013 Compr = true; 4014 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4015 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4016 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4017 } 4018 4019 for (auto i = 0; i < SrcIdx; ++i) { 4020 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4021 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4022 } 4023 } 4024 4025 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4026 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4027 4028 Inst.addOperand(MCOperand::createImm(EnMask)); 4029 } 4030 4031 //===----------------------------------------------------------------------===// 4032 // s_waitcnt 4033 //===----------------------------------------------------------------------===// 4034 4035 static bool 4036 encodeCnt( 4037 const AMDGPU::IsaVersion ISA, 4038 int64_t &IntVal, 4039 int64_t CntVal, 4040 bool Saturate, 4041 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4042 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4043 { 4044 bool Failed = false; 4045 4046 IntVal = encode(ISA, IntVal, CntVal); 4047 if (CntVal != decode(ISA, IntVal)) { 4048 if (Saturate) { 4049 IntVal = encode(ISA, IntVal, -1); 4050 } else { 4051 Failed = true; 4052 } 4053 } 4054 return Failed; 4055 } 4056 4057 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4058 StringRef CntName = Parser.getTok().getString(); 4059 int64_t CntVal; 4060 4061 Parser.Lex(); 4062 if (getLexer().isNot(AsmToken::LParen)) 4063 return true; 4064 4065 Parser.Lex(); 4066 if (getLexer().isNot(AsmToken::Integer)) 4067 return true; 4068 4069 SMLoc ValLoc = Parser.getTok().getLoc(); 4070 if (getParser().parseAbsoluteExpression(CntVal)) 4071 return true; 4072 4073 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4074 4075 bool Failed = true; 4076 bool Sat = CntName.endswith("_sat"); 4077 4078 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4079 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4080 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4081 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4082 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4083 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4084 } 4085 4086 if (Failed) { 4087 Error(ValLoc, "too large value for " + CntName); 4088 return true; 4089 } 4090 4091 if (getLexer().isNot(AsmToken::RParen)) { 4092 return true; 4093 } 4094 4095 Parser.Lex(); 4096 if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma)) { 4097 const AsmToken NextToken = getLexer().peekTok(); 4098 if (NextToken.is(AsmToken::Identifier)) { 4099 Parser.Lex(); 4100 } 4101 } 4102 4103 return false; 4104 } 4105 4106 OperandMatchResultTy 4107 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4108 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4109 int64_t Waitcnt = getWaitcntBitMask(ISA); 4110 SMLoc S = Parser.getTok().getLoc(); 4111 4112 switch(getLexer().getKind()) { 4113 default: return MatchOperand_ParseFail; 4114 case AsmToken::Integer: 4115 // The operand can be an integer value. 4116 if (getParser().parseAbsoluteExpression(Waitcnt)) 4117 return MatchOperand_ParseFail; 4118 break; 4119 4120 case AsmToken::Identifier: 4121 do { 4122 if (parseCnt(Waitcnt)) 4123 return MatchOperand_ParseFail; 4124 } while(getLexer().isNot(AsmToken::EndOfStatement)); 4125 break; 4126 } 4127 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4128 return MatchOperand_Success; 4129 } 4130 4131 bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, 4132 int64_t &Width) { 4133 using namespace llvm::AMDGPU::Hwreg; 4134 4135 if (Parser.getTok().getString() != "hwreg") 4136 return true; 4137 Parser.Lex(); 4138 4139 if (getLexer().isNot(AsmToken::LParen)) 4140 return true; 4141 Parser.Lex(); 4142 4143 if (getLexer().is(AsmToken::Identifier)) { 4144 HwReg.IsSymbolic = true; 4145 HwReg.Id = ID_UNKNOWN_; 4146 const StringRef tok = Parser.getTok().getString(); 4147 int Last = ID_SYMBOLIC_LAST_; 4148 if (isSI() || isCI() || isVI()) 4149 Last = ID_SYMBOLIC_FIRST_GFX9_; 4150 for (int i = ID_SYMBOLIC_FIRST_; i < Last; ++i) { 4151 if (tok == IdSymbolic[i]) { 4152 HwReg.Id = i; 4153 break; 4154 } 4155 } 4156 Parser.Lex(); 4157 } else { 4158 HwReg.IsSymbolic = false; 4159 if (getLexer().isNot(AsmToken::Integer)) 4160 return true; 4161 if (getParser().parseAbsoluteExpression(HwReg.Id)) 4162 return true; 4163 } 4164 4165 if (getLexer().is(AsmToken::RParen)) { 4166 Parser.Lex(); 4167 return false; 4168 } 4169 4170 // optional params 4171 if (getLexer().isNot(AsmToken::Comma)) 4172 return true; 4173 Parser.Lex(); 4174 4175 if (getLexer().isNot(AsmToken::Integer)) 4176 return true; 4177 if (getParser().parseAbsoluteExpression(Offset)) 4178 return true; 4179 4180 if (getLexer().isNot(AsmToken::Comma)) 4181 return true; 4182 Parser.Lex(); 4183 4184 if (getLexer().isNot(AsmToken::Integer)) 4185 return true; 4186 if (getParser().parseAbsoluteExpression(Width)) 4187 return true; 4188 4189 if (getLexer().isNot(AsmToken::RParen)) 4190 return true; 4191 Parser.Lex(); 4192 4193 return false; 4194 } 4195 4196 OperandMatchResultTy AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4197 using namespace llvm::AMDGPU::Hwreg; 4198 4199 int64_t Imm16Val = 0; 4200 SMLoc S = Parser.getTok().getLoc(); 4201 4202 switch(getLexer().getKind()) { 4203 default: return MatchOperand_NoMatch; 4204 case AsmToken::Integer: 4205 // The operand can be an integer value. 4206 if (getParser().parseAbsoluteExpression(Imm16Val)) 4207 return MatchOperand_NoMatch; 4208 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4209 Error(S, "invalid immediate: only 16-bit values are legal"); 4210 // Do not return error code, but create an imm operand anyway and proceed 4211 // to the next operand, if any. That avoids unneccessary error messages. 4212 } 4213 break; 4214 4215 case AsmToken::Identifier: { 4216 OperandInfoTy HwReg(ID_UNKNOWN_); 4217 int64_t Offset = OFFSET_DEFAULT_; 4218 int64_t Width = WIDTH_M1_DEFAULT_ + 1; 4219 if (parseHwregConstruct(HwReg, Offset, Width)) 4220 return MatchOperand_ParseFail; 4221 if (HwReg.Id < 0 || !isUInt<ID_WIDTH_>(HwReg.Id)) { 4222 if (HwReg.IsSymbolic) 4223 Error(S, "invalid symbolic name of hardware register"); 4224 else 4225 Error(S, "invalid code of hardware register: only 6-bit values are legal"); 4226 } 4227 if (Offset < 0 || !isUInt<OFFSET_WIDTH_>(Offset)) 4228 Error(S, "invalid bit offset: only 5-bit values are legal"); 4229 if ((Width-1) < 0 || !isUInt<WIDTH_M1_WIDTH_>(Width-1)) 4230 Error(S, "invalid bitfield width: only values from 1 to 32 are legal"); 4231 Imm16Val = (HwReg.Id << ID_SHIFT_) | (Offset << OFFSET_SHIFT_) | ((Width-1) << WIDTH_M1_SHIFT_); 4232 } 4233 break; 4234 } 4235 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg)); 4236 return MatchOperand_Success; 4237 } 4238 4239 bool AMDGPUOperand::isSWaitCnt() const { 4240 return isImm(); 4241 } 4242 4243 bool AMDGPUOperand::isHwreg() const { 4244 return isImmTy(ImmTyHwreg); 4245 } 4246 4247 bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId) { 4248 using namespace llvm::AMDGPU::SendMsg; 4249 4250 if (Parser.getTok().getString() != "sendmsg") 4251 return true; 4252 Parser.Lex(); 4253 4254 if (getLexer().isNot(AsmToken::LParen)) 4255 return true; 4256 Parser.Lex(); 4257 4258 if (getLexer().is(AsmToken::Identifier)) { 4259 Msg.IsSymbolic = true; 4260 Msg.Id = ID_UNKNOWN_; 4261 const std::string tok = Parser.getTok().getString(); 4262 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 4263 switch(i) { 4264 default: continue; // Omit gaps. 4265 case ID_INTERRUPT: case ID_GS: case ID_GS_DONE: case ID_SYSMSG: break; 4266 } 4267 if (tok == IdSymbolic[i]) { 4268 Msg.Id = i; 4269 break; 4270 } 4271 } 4272 Parser.Lex(); 4273 } else { 4274 Msg.IsSymbolic = false; 4275 if (getLexer().isNot(AsmToken::Integer)) 4276 return true; 4277 if (getParser().parseAbsoluteExpression(Msg.Id)) 4278 return true; 4279 if (getLexer().is(AsmToken::Integer)) 4280 if (getParser().parseAbsoluteExpression(Msg.Id)) 4281 Msg.Id = ID_UNKNOWN_; 4282 } 4283 if (Msg.Id == ID_UNKNOWN_) // Don't know how to parse the rest. 4284 return false; 4285 4286 if (!(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG)) { 4287 if (getLexer().isNot(AsmToken::RParen)) 4288 return true; 4289 Parser.Lex(); 4290 return false; 4291 } 4292 4293 if (getLexer().isNot(AsmToken::Comma)) 4294 return true; 4295 Parser.Lex(); 4296 4297 assert(Msg.Id == ID_GS || Msg.Id == ID_GS_DONE || Msg.Id == ID_SYSMSG); 4298 Operation.Id = ID_UNKNOWN_; 4299 if (getLexer().is(AsmToken::Identifier)) { 4300 Operation.IsSymbolic = true; 4301 const char* const *S = (Msg.Id == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 4302 const int F = (Msg.Id == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 4303 const int L = (Msg.Id == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 4304 const StringRef Tok = Parser.getTok().getString(); 4305 for (int i = F; i < L; ++i) { 4306 if (Tok == S[i]) { 4307 Operation.Id = i; 4308 break; 4309 } 4310 } 4311 Parser.Lex(); 4312 } else { 4313 Operation.IsSymbolic = false; 4314 if (getLexer().isNot(AsmToken::Integer)) 4315 return true; 4316 if (getParser().parseAbsoluteExpression(Operation.Id)) 4317 return true; 4318 } 4319 4320 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4321 // Stream id is optional. 4322 if (getLexer().is(AsmToken::RParen)) { 4323 Parser.Lex(); 4324 return false; 4325 } 4326 4327 if (getLexer().isNot(AsmToken::Comma)) 4328 return true; 4329 Parser.Lex(); 4330 4331 if (getLexer().isNot(AsmToken::Integer)) 4332 return true; 4333 if (getParser().parseAbsoluteExpression(StreamId)) 4334 return true; 4335 } 4336 4337 if (getLexer().isNot(AsmToken::RParen)) 4338 return true; 4339 Parser.Lex(); 4340 return false; 4341 } 4342 4343 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 4344 if (getLexer().getKind() != AsmToken::Identifier) 4345 return MatchOperand_NoMatch; 4346 4347 StringRef Str = Parser.getTok().getString(); 4348 int Slot = StringSwitch<int>(Str) 4349 .Case("p10", 0) 4350 .Case("p20", 1) 4351 .Case("p0", 2) 4352 .Default(-1); 4353 4354 SMLoc S = Parser.getTok().getLoc(); 4355 if (Slot == -1) 4356 return MatchOperand_ParseFail; 4357 4358 Parser.Lex(); 4359 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 4360 AMDGPUOperand::ImmTyInterpSlot)); 4361 return MatchOperand_Success; 4362 } 4363 4364 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 4365 if (getLexer().getKind() != AsmToken::Identifier) 4366 return MatchOperand_NoMatch; 4367 4368 StringRef Str = Parser.getTok().getString(); 4369 if (!Str.startswith("attr")) 4370 return MatchOperand_NoMatch; 4371 4372 StringRef Chan = Str.take_back(2); 4373 int AttrChan = StringSwitch<int>(Chan) 4374 .Case(".x", 0) 4375 .Case(".y", 1) 4376 .Case(".z", 2) 4377 .Case(".w", 3) 4378 .Default(-1); 4379 if (AttrChan == -1) 4380 return MatchOperand_ParseFail; 4381 4382 Str = Str.drop_back(2).drop_front(4); 4383 4384 uint8_t Attr; 4385 if (Str.getAsInteger(10, Attr)) 4386 return MatchOperand_ParseFail; 4387 4388 SMLoc S = Parser.getTok().getLoc(); 4389 Parser.Lex(); 4390 if (Attr > 63) { 4391 Error(S, "out of bounds attr"); 4392 return MatchOperand_Success; 4393 } 4394 4395 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 4396 4397 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 4398 AMDGPUOperand::ImmTyInterpAttr)); 4399 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 4400 AMDGPUOperand::ImmTyAttrChan)); 4401 return MatchOperand_Success; 4402 } 4403 4404 void AMDGPUAsmParser::errorExpTgt() { 4405 Error(Parser.getTok().getLoc(), "invalid exp target"); 4406 } 4407 4408 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 4409 uint8_t &Val) { 4410 if (Str == "null") { 4411 Val = 9; 4412 return MatchOperand_Success; 4413 } 4414 4415 if (Str.startswith("mrt")) { 4416 Str = Str.drop_front(3); 4417 if (Str == "z") { // == mrtz 4418 Val = 8; 4419 return MatchOperand_Success; 4420 } 4421 4422 if (Str.getAsInteger(10, Val)) 4423 return MatchOperand_ParseFail; 4424 4425 if (Val > 7) 4426 errorExpTgt(); 4427 4428 return MatchOperand_Success; 4429 } 4430 4431 if (Str.startswith("pos")) { 4432 Str = Str.drop_front(3); 4433 if (Str.getAsInteger(10, Val)) 4434 return MatchOperand_ParseFail; 4435 4436 if (Val > 3) 4437 errorExpTgt(); 4438 4439 Val += 12; 4440 return MatchOperand_Success; 4441 } 4442 4443 if (Str.startswith("param")) { 4444 Str = Str.drop_front(5); 4445 if (Str.getAsInteger(10, Val)) 4446 return MatchOperand_ParseFail; 4447 4448 if (Val >= 32) 4449 errorExpTgt(); 4450 4451 Val += 32; 4452 return MatchOperand_Success; 4453 } 4454 4455 if (Str.startswith("invalid_target_")) { 4456 Str = Str.drop_front(15); 4457 if (Str.getAsInteger(10, Val)) 4458 return MatchOperand_ParseFail; 4459 4460 errorExpTgt(); 4461 return MatchOperand_Success; 4462 } 4463 4464 return MatchOperand_NoMatch; 4465 } 4466 4467 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 4468 uint8_t Val; 4469 StringRef Str = Parser.getTok().getString(); 4470 4471 auto Res = parseExpTgtImpl(Str, Val); 4472 if (Res != MatchOperand_Success) 4473 return Res; 4474 4475 SMLoc S = Parser.getTok().getLoc(); 4476 Parser.Lex(); 4477 4478 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 4479 AMDGPUOperand::ImmTyExpTgt)); 4480 return MatchOperand_Success; 4481 } 4482 4483 OperandMatchResultTy 4484 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 4485 using namespace llvm::AMDGPU::SendMsg; 4486 4487 int64_t Imm16Val = 0; 4488 SMLoc S = Parser.getTok().getLoc(); 4489 4490 switch(getLexer().getKind()) { 4491 default: 4492 return MatchOperand_NoMatch; 4493 case AsmToken::Integer: 4494 // The operand can be an integer value. 4495 if (getParser().parseAbsoluteExpression(Imm16Val)) 4496 return MatchOperand_NoMatch; 4497 if (Imm16Val < 0 || !isUInt<16>(Imm16Val)) { 4498 Error(S, "invalid immediate: only 16-bit values are legal"); 4499 // Do not return error code, but create an imm operand anyway and proceed 4500 // to the next operand, if any. That avoids unneccessary error messages. 4501 } 4502 break; 4503 case AsmToken::Identifier: { 4504 OperandInfoTy Msg(ID_UNKNOWN_); 4505 OperandInfoTy Operation(OP_UNKNOWN_); 4506 int64_t StreamId = STREAM_ID_DEFAULT_; 4507 if (parseSendMsgConstruct(Msg, Operation, StreamId)) 4508 return MatchOperand_ParseFail; 4509 do { 4510 // Validate and encode message ID. 4511 if (! ((ID_INTERRUPT <= Msg.Id && Msg.Id <= ID_GS_DONE) 4512 || Msg.Id == ID_SYSMSG)) { 4513 if (Msg.IsSymbolic) 4514 Error(S, "invalid/unsupported symbolic name of message"); 4515 else 4516 Error(S, "invalid/unsupported code of message"); 4517 break; 4518 } 4519 Imm16Val = (Msg.Id << ID_SHIFT_); 4520 // Validate and encode operation ID. 4521 if (Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) { 4522 if (! (OP_GS_FIRST_ <= Operation.Id && Operation.Id < OP_GS_LAST_)) { 4523 if (Operation.IsSymbolic) 4524 Error(S, "invalid symbolic name of GS_OP"); 4525 else 4526 Error(S, "invalid code of GS_OP: only 2-bit values are legal"); 4527 break; 4528 } 4529 if (Operation.Id == OP_GS_NOP 4530 && Msg.Id != ID_GS_DONE) { 4531 Error(S, "invalid GS_OP: NOP is for GS_DONE only"); 4532 break; 4533 } 4534 Imm16Val |= (Operation.Id << OP_SHIFT_); 4535 } 4536 if (Msg.Id == ID_SYSMSG) { 4537 if (! (OP_SYS_FIRST_ <= Operation.Id && Operation.Id < OP_SYS_LAST_)) { 4538 if (Operation.IsSymbolic) 4539 Error(S, "invalid/unsupported symbolic name of SYSMSG_OP"); 4540 else 4541 Error(S, "invalid/unsupported code of SYSMSG_OP"); 4542 break; 4543 } 4544 Imm16Val |= (Operation.Id << OP_SHIFT_); 4545 } 4546 // Validate and encode stream ID. 4547 if ((Msg.Id == ID_GS || Msg.Id == ID_GS_DONE) && Operation.Id != OP_GS_NOP) { 4548 if (! (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_)) { 4549 Error(S, "invalid stream id: only 2-bit values are legal"); 4550 break; 4551 } 4552 Imm16Val |= (StreamId << STREAM_ID_SHIFT_); 4553 } 4554 } while (false); 4555 } 4556 break; 4557 } 4558 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg)); 4559 return MatchOperand_Success; 4560 } 4561 4562 bool AMDGPUOperand::isSendMsg() const { 4563 return isImmTy(ImmTySendMsg); 4564 } 4565 4566 //===----------------------------------------------------------------------===// 4567 // parser helpers 4568 //===----------------------------------------------------------------------===// 4569 4570 bool 4571 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 4572 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 4573 } 4574 4575 bool 4576 AMDGPUAsmParser::isId(const StringRef Id) const { 4577 return isId(getToken(), Id); 4578 } 4579 4580 bool 4581 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 4582 return getTokenKind() == Kind; 4583 } 4584 4585 bool 4586 AMDGPUAsmParser::trySkipId(const StringRef Id) { 4587 if (isId(Id)) { 4588 lex(); 4589 return true; 4590 } 4591 return false; 4592 } 4593 4594 bool 4595 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 4596 if (isToken(Kind)) { 4597 lex(); 4598 return true; 4599 } 4600 return false; 4601 } 4602 4603 bool 4604 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 4605 const StringRef ErrMsg) { 4606 if (!trySkipToken(Kind)) { 4607 Error(getLoc(), ErrMsg); 4608 return false; 4609 } 4610 return true; 4611 } 4612 4613 bool 4614 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 4615 return !getParser().parseAbsoluteExpression(Imm); 4616 } 4617 4618 bool 4619 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 4620 if (isToken(AsmToken::String)) { 4621 Val = getToken().getStringContents(); 4622 lex(); 4623 return true; 4624 } else { 4625 Error(getLoc(), ErrMsg); 4626 return false; 4627 } 4628 } 4629 4630 AsmToken 4631 AMDGPUAsmParser::getToken() const { 4632 return Parser.getTok(); 4633 } 4634 4635 AsmToken 4636 AMDGPUAsmParser::peekToken() { 4637 return getLexer().peekTok(); 4638 } 4639 4640 AsmToken::TokenKind 4641 AMDGPUAsmParser::getTokenKind() const { 4642 return getLexer().getKind(); 4643 } 4644 4645 SMLoc 4646 AMDGPUAsmParser::getLoc() const { 4647 return getToken().getLoc(); 4648 } 4649 4650 StringRef 4651 AMDGPUAsmParser::getTokenStr() const { 4652 return getToken().getString(); 4653 } 4654 4655 void 4656 AMDGPUAsmParser::lex() { 4657 Parser.Lex(); 4658 } 4659 4660 //===----------------------------------------------------------------------===// 4661 // swizzle 4662 //===----------------------------------------------------------------------===// 4663 4664 LLVM_READNONE 4665 static unsigned 4666 encodeBitmaskPerm(const unsigned AndMask, 4667 const unsigned OrMask, 4668 const unsigned XorMask) { 4669 using namespace llvm::AMDGPU::Swizzle; 4670 4671 return BITMASK_PERM_ENC | 4672 (AndMask << BITMASK_AND_SHIFT) | 4673 (OrMask << BITMASK_OR_SHIFT) | 4674 (XorMask << BITMASK_XOR_SHIFT); 4675 } 4676 4677 bool 4678 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 4679 const unsigned MinVal, 4680 const unsigned MaxVal, 4681 const StringRef ErrMsg) { 4682 for (unsigned i = 0; i < OpNum; ++i) { 4683 if (!skipToken(AsmToken::Comma, "expected a comma")){ 4684 return false; 4685 } 4686 SMLoc ExprLoc = Parser.getTok().getLoc(); 4687 if (!parseExpr(Op[i])) { 4688 return false; 4689 } 4690 if (Op[i] < MinVal || Op[i] > MaxVal) { 4691 Error(ExprLoc, ErrMsg); 4692 return false; 4693 } 4694 } 4695 4696 return true; 4697 } 4698 4699 bool 4700 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 4701 using namespace llvm::AMDGPU::Swizzle; 4702 4703 int64_t Lane[LANE_NUM]; 4704 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 4705 "expected a 2-bit lane id")) { 4706 Imm = QUAD_PERM_ENC; 4707 for (unsigned I = 0; I < LANE_NUM; ++I) { 4708 Imm |= Lane[I] << (LANE_SHIFT * I); 4709 } 4710 return true; 4711 } 4712 return false; 4713 } 4714 4715 bool 4716 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 4717 using namespace llvm::AMDGPU::Swizzle; 4718 4719 SMLoc S = Parser.getTok().getLoc(); 4720 int64_t GroupSize; 4721 int64_t LaneIdx; 4722 4723 if (!parseSwizzleOperands(1, &GroupSize, 4724 2, 32, 4725 "group size must be in the interval [2,32]")) { 4726 return false; 4727 } 4728 if (!isPowerOf2_64(GroupSize)) { 4729 Error(S, "group size must be a power of two"); 4730 return false; 4731 } 4732 if (parseSwizzleOperands(1, &LaneIdx, 4733 0, GroupSize - 1, 4734 "lane id must be in the interval [0,group size - 1]")) { 4735 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 4736 return true; 4737 } 4738 return false; 4739 } 4740 4741 bool 4742 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 4743 using namespace llvm::AMDGPU::Swizzle; 4744 4745 SMLoc S = Parser.getTok().getLoc(); 4746 int64_t GroupSize; 4747 4748 if (!parseSwizzleOperands(1, &GroupSize, 4749 2, 32, "group size must be in the interval [2,32]")) { 4750 return false; 4751 } 4752 if (!isPowerOf2_64(GroupSize)) { 4753 Error(S, "group size must be a power of two"); 4754 return false; 4755 } 4756 4757 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 4758 return true; 4759 } 4760 4761 bool 4762 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 4763 using namespace llvm::AMDGPU::Swizzle; 4764 4765 SMLoc S = Parser.getTok().getLoc(); 4766 int64_t GroupSize; 4767 4768 if (!parseSwizzleOperands(1, &GroupSize, 4769 1, 16, "group size must be in the interval [1,16]")) { 4770 return false; 4771 } 4772 if (!isPowerOf2_64(GroupSize)) { 4773 Error(S, "group size must be a power of two"); 4774 return false; 4775 } 4776 4777 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 4778 return true; 4779 } 4780 4781 bool 4782 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 4783 using namespace llvm::AMDGPU::Swizzle; 4784 4785 if (!skipToken(AsmToken::Comma, "expected a comma")) { 4786 return false; 4787 } 4788 4789 StringRef Ctl; 4790 SMLoc StrLoc = Parser.getTok().getLoc(); 4791 if (!parseString(Ctl)) { 4792 return false; 4793 } 4794 if (Ctl.size() != BITMASK_WIDTH) { 4795 Error(StrLoc, "expected a 5-character mask"); 4796 return false; 4797 } 4798 4799 unsigned AndMask = 0; 4800 unsigned OrMask = 0; 4801 unsigned XorMask = 0; 4802 4803 for (size_t i = 0; i < Ctl.size(); ++i) { 4804 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 4805 switch(Ctl[i]) { 4806 default: 4807 Error(StrLoc, "invalid mask"); 4808 return false; 4809 case '0': 4810 break; 4811 case '1': 4812 OrMask |= Mask; 4813 break; 4814 case 'p': 4815 AndMask |= Mask; 4816 break; 4817 case 'i': 4818 AndMask |= Mask; 4819 XorMask |= Mask; 4820 break; 4821 } 4822 } 4823 4824 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 4825 return true; 4826 } 4827 4828 bool 4829 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 4830 4831 SMLoc OffsetLoc = Parser.getTok().getLoc(); 4832 4833 if (!parseExpr(Imm)) { 4834 return false; 4835 } 4836 if (!isUInt<16>(Imm)) { 4837 Error(OffsetLoc, "expected a 16-bit offset"); 4838 return false; 4839 } 4840 return true; 4841 } 4842 4843 bool 4844 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 4845 using namespace llvm::AMDGPU::Swizzle; 4846 4847 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 4848 4849 SMLoc ModeLoc = Parser.getTok().getLoc(); 4850 bool Ok = false; 4851 4852 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 4853 Ok = parseSwizzleQuadPerm(Imm); 4854 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 4855 Ok = parseSwizzleBitmaskPerm(Imm); 4856 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 4857 Ok = parseSwizzleBroadcast(Imm); 4858 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 4859 Ok = parseSwizzleSwap(Imm); 4860 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 4861 Ok = parseSwizzleReverse(Imm); 4862 } else { 4863 Error(ModeLoc, "expected a swizzle mode"); 4864 } 4865 4866 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 4867 } 4868 4869 return false; 4870 } 4871 4872 OperandMatchResultTy 4873 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 4874 SMLoc S = Parser.getTok().getLoc(); 4875 int64_t Imm = 0; 4876 4877 if (trySkipId("offset")) { 4878 4879 bool Ok = false; 4880 if (skipToken(AsmToken::Colon, "expected a colon")) { 4881 if (trySkipId("swizzle")) { 4882 Ok = parseSwizzleMacro(Imm); 4883 } else { 4884 Ok = parseSwizzleOffset(Imm); 4885 } 4886 } 4887 4888 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 4889 4890 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 4891 } else { 4892 // Swizzle "offset" operand is optional. 4893 // If it is omitted, try parsing other optional operands. 4894 return parseOptionalOpr(Operands); 4895 } 4896 } 4897 4898 bool 4899 AMDGPUOperand::isSwizzle() const { 4900 return isImmTy(ImmTySwizzle); 4901 } 4902 4903 //===----------------------------------------------------------------------===// 4904 // VGPR Index Mode 4905 //===----------------------------------------------------------------------===// 4906 4907 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 4908 4909 using namespace llvm::AMDGPU::VGPRIndexMode; 4910 4911 if (trySkipToken(AsmToken::RParen)) { 4912 return OFF; 4913 } 4914 4915 int64_t Imm = 0; 4916 4917 while (true) { 4918 unsigned Mode = 0; 4919 SMLoc S = Parser.getTok().getLoc(); 4920 4921 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 4922 if (trySkipId(IdSymbolic[ModeId])) { 4923 Mode = 1 << ModeId; 4924 break; 4925 } 4926 } 4927 4928 if (Mode == 0) { 4929 Error(S, (Imm == 0)? 4930 "expected a VGPR index mode or a closing parenthesis" : 4931 "expected a VGPR index mode"); 4932 break; 4933 } 4934 4935 if (Imm & Mode) { 4936 Error(S, "duplicate VGPR index mode"); 4937 break; 4938 } 4939 Imm |= Mode; 4940 4941 if (trySkipToken(AsmToken::RParen)) 4942 break; 4943 if (!skipToken(AsmToken::Comma, 4944 "expected a comma or a closing parenthesis")) 4945 break; 4946 } 4947 4948 return Imm; 4949 } 4950 4951 OperandMatchResultTy 4952 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 4953 4954 int64_t Imm = 0; 4955 SMLoc S = Parser.getTok().getLoc(); 4956 4957 if (getLexer().getKind() == AsmToken::Identifier && 4958 Parser.getTok().getString() == "gpr_idx" && 4959 getLexer().peekTok().is(AsmToken::LParen)) { 4960 4961 Parser.Lex(); 4962 Parser.Lex(); 4963 4964 // If parse failed, trigger an error but do not return error code 4965 // to avoid excessive error messages. 4966 Imm = parseGPRIdxMacro(); 4967 4968 } else { 4969 if (getParser().parseAbsoluteExpression(Imm)) 4970 return MatchOperand_NoMatch; 4971 if (Imm < 0 || !isUInt<4>(Imm)) { 4972 Error(S, "invalid immediate: only 4-bit values are legal"); 4973 } 4974 } 4975 4976 Operands.push_back( 4977 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 4978 return MatchOperand_Success; 4979 } 4980 4981 bool AMDGPUOperand::isGPRIdxMode() const { 4982 return isImmTy(ImmTyGprIdxMode); 4983 } 4984 4985 //===----------------------------------------------------------------------===// 4986 // sopp branch targets 4987 //===----------------------------------------------------------------------===// 4988 4989 OperandMatchResultTy 4990 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 4991 SMLoc S = Parser.getTok().getLoc(); 4992 4993 switch (getLexer().getKind()) { 4994 default: return MatchOperand_ParseFail; 4995 case AsmToken::Integer: { 4996 int64_t Imm; 4997 if (getParser().parseAbsoluteExpression(Imm)) 4998 return MatchOperand_ParseFail; 4999 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S)); 5000 return MatchOperand_Success; 5001 } 5002 5003 case AsmToken::Identifier: 5004 Operands.push_back(AMDGPUOperand::CreateExpr(this, 5005 MCSymbolRefExpr::create(getContext().getOrCreateSymbol( 5006 Parser.getTok().getString()), getContext()), S)); 5007 Parser.Lex(); 5008 return MatchOperand_Success; 5009 } 5010 } 5011 5012 //===----------------------------------------------------------------------===// 5013 // mubuf 5014 //===----------------------------------------------------------------------===// 5015 5016 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5017 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5018 } 5019 5020 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5021 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5022 } 5023 5024 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5025 const OperandVector &Operands, 5026 bool IsAtomic, 5027 bool IsAtomicReturn, 5028 bool IsLds) { 5029 bool IsLdsOpcode = IsLds; 5030 bool HasLdsModifier = false; 5031 OptionalImmIndexMap OptionalIdx; 5032 assert(IsAtomicReturn ? IsAtomic : true); 5033 unsigned FirstOperandIdx = 1; 5034 5035 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5036 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5037 5038 // Add the register arguments 5039 if (Op.isReg()) { 5040 Op.addRegOperands(Inst, 1); 5041 // Insert a tied src for atomic return dst. 5042 // This cannot be postponed as subsequent calls to 5043 // addImmOperands rely on correct number of MC operands. 5044 if (IsAtomicReturn && i == FirstOperandIdx) 5045 Op.addRegOperands(Inst, 1); 5046 continue; 5047 } 5048 5049 // Handle the case where soffset is an immediate 5050 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5051 Op.addImmOperands(Inst, 1); 5052 continue; 5053 } 5054 5055 HasLdsModifier = Op.isLDS(); 5056 5057 // Handle tokens like 'offen' which are sometimes hard-coded into the 5058 // asm string. There are no MCInst operands for these. 5059 if (Op.isToken()) { 5060 continue; 5061 } 5062 assert(Op.isImm()); 5063 5064 // Handle optional arguments 5065 OptionalIdx[Op.getImmTy()] = i; 5066 } 5067 5068 // This is a workaround for an llvm quirk which may result in an 5069 // incorrect instruction selection. Lds and non-lds versions of 5070 // MUBUF instructions are identical except that lds versions 5071 // have mandatory 'lds' modifier. However this modifier follows 5072 // optional modifiers and llvm asm matcher regards this 'lds' 5073 // modifier as an optional one. As a result, an lds version 5074 // of opcode may be selected even if it has no 'lds' modifier. 5075 if (IsLdsOpcode && !HasLdsModifier) { 5076 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5077 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5078 Inst.setOpcode(NoLdsOpcode); 5079 IsLdsOpcode = false; 5080 } 5081 } 5082 5083 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5084 if (!IsAtomic) { // glc is hard-coded. 5085 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5086 } 5087 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5088 5089 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5090 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5091 } 5092 } 5093 5094 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5095 OptionalImmIndexMap OptionalIdx; 5096 5097 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5098 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5099 5100 // Add the register arguments 5101 if (Op.isReg()) { 5102 Op.addRegOperands(Inst, 1); 5103 continue; 5104 } 5105 5106 // Handle the case where soffset is an immediate 5107 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5108 Op.addImmOperands(Inst, 1); 5109 continue; 5110 } 5111 5112 // Handle tokens like 'offen' which are sometimes hard-coded into the 5113 // asm string. There are no MCInst operands for these. 5114 if (Op.isToken()) { 5115 continue; 5116 } 5117 assert(Op.isImm()); 5118 5119 // Handle optional arguments 5120 OptionalIdx[Op.getImmTy()] = i; 5121 } 5122 5123 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5124 AMDGPUOperand::ImmTyOffset); 5125 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5126 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5127 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5128 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5129 } 5130 5131 //===----------------------------------------------------------------------===// 5132 // mimg 5133 //===----------------------------------------------------------------------===// 5134 5135 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5136 bool IsAtomic) { 5137 unsigned I = 1; 5138 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5139 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5140 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5141 } 5142 5143 if (IsAtomic) { 5144 // Add src, same as dst 5145 assert(Desc.getNumDefs() == 1); 5146 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5147 } 5148 5149 OptionalImmIndexMap OptionalIdx; 5150 5151 for (unsigned E = Operands.size(); I != E; ++I) { 5152 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5153 5154 // Add the register arguments 5155 if (Op.isReg()) { 5156 Op.addRegOperands(Inst, 1); 5157 } else if (Op.isImmModifier()) { 5158 OptionalIdx[Op.getImmTy()] = I; 5159 } else { 5160 llvm_unreachable("unexpected operand type"); 5161 } 5162 } 5163 5164 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5165 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5166 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5167 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5168 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5169 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5170 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5171 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5172 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5173 } 5174 5175 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5176 cvtMIMG(Inst, Operands, true); 5177 } 5178 5179 //===----------------------------------------------------------------------===// 5180 // smrd 5181 //===----------------------------------------------------------------------===// 5182 5183 bool AMDGPUOperand::isSMRDOffset8() const { 5184 return isImm() && isUInt<8>(getImm()); 5185 } 5186 5187 bool AMDGPUOperand::isSMRDOffset20() const { 5188 return isImm() && isUInt<20>(getImm()); 5189 } 5190 5191 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5192 // 32-bit literals are only supported on CI and we only want to use them 5193 // when the offset is > 8-bits. 5194 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5195 } 5196 5197 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5198 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5199 } 5200 5201 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5202 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5203 } 5204 5205 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5206 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5207 } 5208 5209 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetU12() const { 5210 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5211 } 5212 5213 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultOffsetS13() const { 5214 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5215 } 5216 5217 //===----------------------------------------------------------------------===// 5218 // vop3 5219 //===----------------------------------------------------------------------===// 5220 5221 static bool ConvertOmodMul(int64_t &Mul) { 5222 if (Mul != 1 && Mul != 2 && Mul != 4) 5223 return false; 5224 5225 Mul >>= 1; 5226 return true; 5227 } 5228 5229 static bool ConvertOmodDiv(int64_t &Div) { 5230 if (Div == 1) { 5231 Div = 0; 5232 return true; 5233 } 5234 5235 if (Div == 2) { 5236 Div = 3; 5237 return true; 5238 } 5239 5240 return false; 5241 } 5242 5243 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5244 if (BoundCtrl == 0) { 5245 BoundCtrl = 1; 5246 return true; 5247 } 5248 5249 if (BoundCtrl == -1) { 5250 BoundCtrl = 0; 5251 return true; 5252 } 5253 5254 return false; 5255 } 5256 5257 // Note: the order in this table matches the order of operands in AsmString. 5258 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5259 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5260 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5261 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5262 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5263 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5264 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5265 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5266 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5267 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5268 {"dfmt", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5269 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5270 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5271 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5272 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5273 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5274 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5275 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5276 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5277 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5278 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5279 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5280 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5281 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5282 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5283 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5284 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5285 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5286 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5287 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5288 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5289 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5290 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5291 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5292 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5293 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5294 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5295 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr} 5296 }; 5297 5298 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 5299 unsigned size = Operands.size(); 5300 assert(size > 0); 5301 5302 OperandMatchResultTy res = parseOptionalOpr(Operands); 5303 5304 // This is a hack to enable hardcoded mandatory operands which follow 5305 // optional operands. 5306 // 5307 // Current design assumes that all operands after the first optional operand 5308 // are also optional. However implementation of some instructions violates 5309 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 5310 // 5311 // To alleviate this problem, we have to (implicitly) parse extra operands 5312 // to make sure autogenerated parser of custom operands never hit hardcoded 5313 // mandatory operands. 5314 5315 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 5316 5317 // We have parsed the first optional operand. 5318 // Parse as many operands as necessary to skip all mandatory operands. 5319 5320 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 5321 if (res != MatchOperand_Success || 5322 getLexer().is(AsmToken::EndOfStatement)) break; 5323 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 5324 res = parseOptionalOpr(Operands); 5325 } 5326 } 5327 5328 return res; 5329 } 5330 5331 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 5332 OperandMatchResultTy res; 5333 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 5334 // try to parse any optional operand here 5335 if (Op.IsBit) { 5336 res = parseNamedBit(Op.Name, Operands, Op.Type); 5337 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 5338 res = parseOModOperand(Operands); 5339 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 5340 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 5341 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 5342 res = parseSDWASel(Operands, Op.Name, Op.Type); 5343 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 5344 res = parseSDWADstUnused(Operands); 5345 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 5346 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 5347 Op.Type == AMDGPUOperand::ImmTyNegLo || 5348 Op.Type == AMDGPUOperand::ImmTyNegHi) { 5349 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 5350 Op.ConvertResult); 5351 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT) { 5352 res = parseDfmtNfmt(Operands); 5353 } else { 5354 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 5355 } 5356 if (res != MatchOperand_NoMatch) { 5357 return res; 5358 } 5359 } 5360 return MatchOperand_NoMatch; 5361 } 5362 5363 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 5364 StringRef Name = Parser.getTok().getString(); 5365 if (Name == "mul") { 5366 return parseIntWithPrefix("mul", Operands, 5367 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 5368 } 5369 5370 if (Name == "div") { 5371 return parseIntWithPrefix("div", Operands, 5372 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 5373 } 5374 5375 return MatchOperand_NoMatch; 5376 } 5377 5378 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 5379 cvtVOP3P(Inst, Operands); 5380 5381 int Opc = Inst.getOpcode(); 5382 5383 int SrcNum; 5384 const int Ops[] = { AMDGPU::OpName::src0, 5385 AMDGPU::OpName::src1, 5386 AMDGPU::OpName::src2 }; 5387 for (SrcNum = 0; 5388 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 5389 ++SrcNum); 5390 assert(SrcNum > 0); 5391 5392 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5393 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5394 5395 if ((OpSel & (1 << SrcNum)) != 0) { 5396 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 5397 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 5398 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 5399 } 5400 } 5401 5402 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 5403 // 1. This operand is input modifiers 5404 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 5405 // 2. This is not last operand 5406 && Desc.NumOperands > (OpNum + 1) 5407 // 3. Next operand is register class 5408 && Desc.OpInfo[OpNum + 1].RegClass != -1 5409 // 4. Next register is not tied to any other operand 5410 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 5411 } 5412 5413 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 5414 { 5415 OptionalImmIndexMap OptionalIdx; 5416 unsigned Opc = Inst.getOpcode(); 5417 5418 unsigned I = 1; 5419 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5420 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5421 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5422 } 5423 5424 for (unsigned E = Operands.size(); I != E; ++I) { 5425 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5426 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5427 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5428 } else if (Op.isInterpSlot() || 5429 Op.isInterpAttr() || 5430 Op.isAttrChan()) { 5431 Inst.addOperand(MCOperand::createImm(Op.Imm.Val)); 5432 } else if (Op.isImmModifier()) { 5433 OptionalIdx[Op.getImmTy()] = I; 5434 } else { 5435 llvm_unreachable("unhandled operand type"); 5436 } 5437 } 5438 5439 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 5440 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 5441 } 5442 5443 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5444 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5445 } 5446 5447 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5448 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5449 } 5450 } 5451 5452 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 5453 OptionalImmIndexMap &OptionalIdx) { 5454 unsigned Opc = Inst.getOpcode(); 5455 5456 unsigned I = 1; 5457 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5458 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5459 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5460 } 5461 5462 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 5463 // This instruction has src modifiers 5464 for (unsigned E = Operands.size(); I != E; ++I) { 5465 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5466 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5467 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 5468 } else if (Op.isImmModifier()) { 5469 OptionalIdx[Op.getImmTy()] = I; 5470 } else if (Op.isRegOrImm()) { 5471 Op.addRegOrImmOperands(Inst, 1); 5472 } else { 5473 llvm_unreachable("unhandled operand type"); 5474 } 5475 } 5476 } else { 5477 // No src modifiers 5478 for (unsigned E = Operands.size(); I != E; ++I) { 5479 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5480 if (Op.isMod()) { 5481 OptionalIdx[Op.getImmTy()] = I; 5482 } else { 5483 Op.addRegOrImmOperands(Inst, 1); 5484 } 5485 } 5486 } 5487 5488 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 5489 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 5490 } 5491 5492 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 5493 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 5494 } 5495 5496 // Special case v_mac_{f16, f32} and v_fmac_f32 (gfx906): 5497 // it has src2 register operand that is tied to dst operand 5498 // we don't allow modifiers for this operand in assembler so src2_modifiers 5499 // should be 0. 5500 if (Opc == AMDGPU::V_MAC_F32_e64_si || 5501 Opc == AMDGPU::V_MAC_F32_e64_vi || 5502 Opc == AMDGPU::V_MAC_F16_e64_vi || 5503 Opc == AMDGPU::V_FMAC_F32_e64_vi) { 5504 auto it = Inst.begin(); 5505 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 5506 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 5507 ++it; 5508 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5509 } 5510 } 5511 5512 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 5513 OptionalImmIndexMap OptionalIdx; 5514 cvtVOP3(Inst, Operands, OptionalIdx); 5515 } 5516 5517 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 5518 const OperandVector &Operands) { 5519 OptionalImmIndexMap OptIdx; 5520 const int Opc = Inst.getOpcode(); 5521 const MCInstrDesc &Desc = MII.get(Opc); 5522 5523 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 5524 5525 cvtVOP3(Inst, Operands, OptIdx); 5526 5527 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 5528 assert(!IsPacked); 5529 Inst.addOperand(Inst.getOperand(0)); 5530 } 5531 5532 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 5533 // instruction, and then figure out where to actually put the modifiers 5534 5535 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 5536 5537 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 5538 if (OpSelHiIdx != -1) { 5539 int DefaultVal = IsPacked ? -1 : 0; 5540 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 5541 DefaultVal); 5542 } 5543 5544 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 5545 if (NegLoIdx != -1) { 5546 assert(IsPacked); 5547 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 5548 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 5549 } 5550 5551 const int Ops[] = { AMDGPU::OpName::src0, 5552 AMDGPU::OpName::src1, 5553 AMDGPU::OpName::src2 }; 5554 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 5555 AMDGPU::OpName::src1_modifiers, 5556 AMDGPU::OpName::src2_modifiers }; 5557 5558 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 5559 5560 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 5561 unsigned OpSelHi = 0; 5562 unsigned NegLo = 0; 5563 unsigned NegHi = 0; 5564 5565 if (OpSelHiIdx != -1) { 5566 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 5567 } 5568 5569 if (NegLoIdx != -1) { 5570 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 5571 NegLo = Inst.getOperand(NegLoIdx).getImm(); 5572 NegHi = Inst.getOperand(NegHiIdx).getImm(); 5573 } 5574 5575 for (int J = 0; J < 3; ++J) { 5576 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 5577 if (OpIdx == -1) 5578 break; 5579 5580 uint32_t ModVal = 0; 5581 5582 if ((OpSel & (1 << J)) != 0) 5583 ModVal |= SISrcMods::OP_SEL_0; 5584 5585 if ((OpSelHi & (1 << J)) != 0) 5586 ModVal |= SISrcMods::OP_SEL_1; 5587 5588 if ((NegLo & (1 << J)) != 0) 5589 ModVal |= SISrcMods::NEG; 5590 5591 if ((NegHi & (1 << J)) != 0) 5592 ModVal |= SISrcMods::NEG_HI; 5593 5594 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 5595 5596 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 5597 } 5598 } 5599 5600 //===----------------------------------------------------------------------===// 5601 // dpp 5602 //===----------------------------------------------------------------------===// 5603 5604 bool AMDGPUOperand::isDPPCtrl() const { 5605 using namespace AMDGPU::DPP; 5606 5607 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 5608 if (result) { 5609 int64_t Imm = getImm(); 5610 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 5611 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 5612 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 5613 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 5614 (Imm == DppCtrl::WAVE_SHL1) || 5615 (Imm == DppCtrl::WAVE_ROL1) || 5616 (Imm == DppCtrl::WAVE_SHR1) || 5617 (Imm == DppCtrl::WAVE_ROR1) || 5618 (Imm == DppCtrl::ROW_MIRROR) || 5619 (Imm == DppCtrl::ROW_HALF_MIRROR) || 5620 (Imm == DppCtrl::BCAST15) || 5621 (Imm == DppCtrl::BCAST31); 5622 } 5623 return false; 5624 } 5625 5626 bool AMDGPUOperand::isS16Imm() const { 5627 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 5628 } 5629 5630 bool AMDGPUOperand::isU16Imm() const { 5631 return isImm() && isUInt<16>(getImm()); 5632 } 5633 5634 OperandMatchResultTy 5635 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 5636 using namespace AMDGPU::DPP; 5637 5638 SMLoc S = Parser.getTok().getLoc(); 5639 StringRef Prefix; 5640 int64_t Int; 5641 5642 if (getLexer().getKind() == AsmToken::Identifier) { 5643 Prefix = Parser.getTok().getString(); 5644 } else { 5645 return MatchOperand_NoMatch; 5646 } 5647 5648 if (Prefix == "row_mirror") { 5649 Int = DppCtrl::ROW_MIRROR; 5650 Parser.Lex(); 5651 } else if (Prefix == "row_half_mirror") { 5652 Int = DppCtrl::ROW_HALF_MIRROR; 5653 Parser.Lex(); 5654 } else { 5655 // Check to prevent parseDPPCtrlOps from eating invalid tokens 5656 if (Prefix != "quad_perm" 5657 && Prefix != "row_shl" 5658 && Prefix != "row_shr" 5659 && Prefix != "row_ror" 5660 && Prefix != "wave_shl" 5661 && Prefix != "wave_rol" 5662 && Prefix != "wave_shr" 5663 && Prefix != "wave_ror" 5664 && Prefix != "row_bcast") { 5665 return MatchOperand_NoMatch; 5666 } 5667 5668 Parser.Lex(); 5669 if (getLexer().isNot(AsmToken::Colon)) 5670 return MatchOperand_ParseFail; 5671 5672 if (Prefix == "quad_perm") { 5673 // quad_perm:[%d,%d,%d,%d] 5674 Parser.Lex(); 5675 if (getLexer().isNot(AsmToken::LBrac)) 5676 return MatchOperand_ParseFail; 5677 Parser.Lex(); 5678 5679 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 5680 return MatchOperand_ParseFail; 5681 5682 for (int i = 0; i < 3; ++i) { 5683 if (getLexer().isNot(AsmToken::Comma)) 5684 return MatchOperand_ParseFail; 5685 Parser.Lex(); 5686 5687 int64_t Temp; 5688 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 5689 return MatchOperand_ParseFail; 5690 const int shift = i*2 + 2; 5691 Int += (Temp << shift); 5692 } 5693 5694 if (getLexer().isNot(AsmToken::RBrac)) 5695 return MatchOperand_ParseFail; 5696 Parser.Lex(); 5697 } else { 5698 // sel:%d 5699 Parser.Lex(); 5700 if (getParser().parseAbsoluteExpression(Int)) 5701 return MatchOperand_ParseFail; 5702 5703 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 5704 Int |= DppCtrl::ROW_SHL0; 5705 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 5706 Int |= DppCtrl::ROW_SHR0; 5707 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 5708 Int |= DppCtrl::ROW_ROR0; 5709 } else if (Prefix == "wave_shl" && 1 == Int) { 5710 Int = DppCtrl::WAVE_SHL1; 5711 } else if (Prefix == "wave_rol" && 1 == Int) { 5712 Int = DppCtrl::WAVE_ROL1; 5713 } else if (Prefix == "wave_shr" && 1 == Int) { 5714 Int = DppCtrl::WAVE_SHR1; 5715 } else if (Prefix == "wave_ror" && 1 == Int) { 5716 Int = DppCtrl::WAVE_ROR1; 5717 } else if (Prefix == "row_bcast") { 5718 if (Int == 15) { 5719 Int = DppCtrl::BCAST15; 5720 } else if (Int == 31) { 5721 Int = DppCtrl::BCAST31; 5722 } else { 5723 return MatchOperand_ParseFail; 5724 } 5725 } else { 5726 return MatchOperand_ParseFail; 5727 } 5728 } 5729 } 5730 5731 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 5732 return MatchOperand_Success; 5733 } 5734 5735 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 5736 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 5737 } 5738 5739 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 5740 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 5741 } 5742 5743 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 5744 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 5745 } 5746 5747 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 5748 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 5749 } 5750 5751 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) { 5752 OptionalImmIndexMap OptionalIdx; 5753 5754 unsigned I = 1; 5755 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5756 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5757 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5758 } 5759 5760 for (unsigned E = Operands.size(); I != E; ++I) { 5761 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 5762 MCOI::TIED_TO); 5763 if (TiedTo != -1) { 5764 assert((unsigned)TiedTo < Inst.getNumOperands()); 5765 // handle tied old or src2 for MAC instructions 5766 Inst.addOperand(Inst.getOperand(TiedTo)); 5767 } 5768 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5769 // Add the register arguments 5770 if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5771 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 5772 // Skip it. 5773 continue; 5774 } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5775 Op.addRegWithFPInputModsOperands(Inst, 2); 5776 } else if (Op.isDPPCtrl()) { 5777 Op.addImmOperands(Inst, 1); 5778 } else if (Op.isImm()) { 5779 // Handle optional arguments 5780 OptionalIdx[Op.getImmTy()] = I; 5781 } else { 5782 llvm_unreachable("Invalid operand type"); 5783 } 5784 } 5785 5786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 5787 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 5788 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 5789 } 5790 5791 //===----------------------------------------------------------------------===// 5792 // sdwa 5793 //===----------------------------------------------------------------------===// 5794 5795 OperandMatchResultTy 5796 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 5797 AMDGPUOperand::ImmTy Type) { 5798 using namespace llvm::AMDGPU::SDWA; 5799 5800 SMLoc S = Parser.getTok().getLoc(); 5801 StringRef Value; 5802 OperandMatchResultTy res; 5803 5804 res = parseStringWithPrefix(Prefix, Value); 5805 if (res != MatchOperand_Success) { 5806 return res; 5807 } 5808 5809 int64_t Int; 5810 Int = StringSwitch<int64_t>(Value) 5811 .Case("BYTE_0", SdwaSel::BYTE_0) 5812 .Case("BYTE_1", SdwaSel::BYTE_1) 5813 .Case("BYTE_2", SdwaSel::BYTE_2) 5814 .Case("BYTE_3", SdwaSel::BYTE_3) 5815 .Case("WORD_0", SdwaSel::WORD_0) 5816 .Case("WORD_1", SdwaSel::WORD_1) 5817 .Case("DWORD", SdwaSel::DWORD) 5818 .Default(0xffffffff); 5819 Parser.Lex(); // eat last token 5820 5821 if (Int == 0xffffffff) { 5822 return MatchOperand_ParseFail; 5823 } 5824 5825 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 5826 return MatchOperand_Success; 5827 } 5828 5829 OperandMatchResultTy 5830 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 5831 using namespace llvm::AMDGPU::SDWA; 5832 5833 SMLoc S = Parser.getTok().getLoc(); 5834 StringRef Value; 5835 OperandMatchResultTy res; 5836 5837 res = parseStringWithPrefix("dst_unused", Value); 5838 if (res != MatchOperand_Success) { 5839 return res; 5840 } 5841 5842 int64_t Int; 5843 Int = StringSwitch<int64_t>(Value) 5844 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 5845 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 5846 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 5847 .Default(0xffffffff); 5848 Parser.Lex(); // eat last token 5849 5850 if (Int == 0xffffffff) { 5851 return MatchOperand_ParseFail; 5852 } 5853 5854 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 5855 return MatchOperand_Success; 5856 } 5857 5858 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 5859 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 5860 } 5861 5862 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 5863 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 5864 } 5865 5866 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 5867 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 5868 } 5869 5870 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 5871 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 5872 } 5873 5874 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 5875 uint64_t BasicInstType, bool skipVcc) { 5876 using namespace llvm::AMDGPU::SDWA; 5877 5878 OptionalImmIndexMap OptionalIdx; 5879 bool skippedVcc = false; 5880 5881 unsigned I = 1; 5882 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5883 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5884 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5885 } 5886 5887 for (unsigned E = Operands.size(); I != E; ++I) { 5888 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5889 if (skipVcc && !skippedVcc && Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) { 5890 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 5891 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 5892 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 5893 // Skip VCC only if we didn't skip it on previous iteration. 5894 if (BasicInstType == SIInstrFlags::VOP2 && 5895 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 5896 skippedVcc = true; 5897 continue; 5898 } else if (BasicInstType == SIInstrFlags::VOPC && 5899 Inst.getNumOperands() == 0) { 5900 skippedVcc = true; 5901 continue; 5902 } 5903 } 5904 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 5905 Op.addRegOrImmWithInputModsOperands(Inst, 2); 5906 } else if (Op.isImm()) { 5907 // Handle optional arguments 5908 OptionalIdx[Op.getImmTy()] = I; 5909 } else { 5910 llvm_unreachable("Invalid operand type"); 5911 } 5912 skippedVcc = false; 5913 } 5914 5915 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 5916 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 5917 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 5918 switch (BasicInstType) { 5919 case SIInstrFlags::VOP1: 5920 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5921 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5922 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5923 } 5924 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5925 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5926 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5927 break; 5928 5929 case SIInstrFlags::VOP2: 5930 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5931 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 5932 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 5933 } 5934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 5935 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 5936 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5938 break; 5939 5940 case SIInstrFlags::VOPC: 5941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 5942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 5943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 5944 break; 5945 5946 default: 5947 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 5948 } 5949 } 5950 5951 // special case v_mac_{f16, f32}: 5952 // it has src2 register operand that is tied to dst operand 5953 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 5954 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 5955 auto it = Inst.begin(); 5956 std::advance( 5957 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 5958 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 5959 } 5960 } 5961 5962 /// Force static initialization. 5963 extern "C" void LLVMInitializeAMDGPUAsmParser() { 5964 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 5965 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 5966 } 5967 5968 #define GET_REGISTER_MATCHER 5969 #define GET_MATCHER_IMPLEMENTATION 5970 #define GET_MNEMONIC_SPELL_CHECKER 5971 #include "AMDGPUGenAsmMatcher.inc" 5972 5973 // This fuction should be defined after auto-generated include so that we have 5974 // MatchClassKind enum defined 5975 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 5976 unsigned Kind) { 5977 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 5978 // But MatchInstructionImpl() expects to meet token and fails to validate 5979 // operand. This method checks if we are given immediate operand but expect to 5980 // get corresponding token. 5981 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 5982 switch (Kind) { 5983 case MCK_addr64: 5984 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 5985 case MCK_gds: 5986 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 5987 case MCK_lds: 5988 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 5989 case MCK_glc: 5990 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 5991 case MCK_idxen: 5992 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 5993 case MCK_offen: 5994 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 5995 case MCK_SSrcB32: 5996 // When operands have expression values, they will return true for isToken, 5997 // because it is not possible to distinguish between a token and an 5998 // expression at parse time. MatchInstructionImpl() will always try to 5999 // match an operand as a token, when isToken returns true, and when the 6000 // name of the expression is not a valid token, the match will fail, 6001 // so we need to handle it here. 6002 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6003 case MCK_SSrcF32: 6004 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6005 case MCK_SoppBrTarget: 6006 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6007 case MCK_VReg32OrOff: 6008 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6009 case MCK_InterpSlot: 6010 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6011 case MCK_Attr: 6012 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6013 case MCK_AttrChan: 6014 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6015 default: 6016 return Match_InvalidOperand; 6017 } 6018 } 6019 6020 //===----------------------------------------------------------------------===// 6021 // endpgm 6022 //===----------------------------------------------------------------------===// 6023 6024 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6025 SMLoc S = Parser.getTok().getLoc(); 6026 int64_t Imm = 0; 6027 6028 if (!parseExpr(Imm)) { 6029 // The operand is optional, if not present default to 0 6030 Imm = 0; 6031 } 6032 6033 if (!isUInt<16>(Imm)) { 6034 Error(S, "expected a 16-bit value"); 6035 return MatchOperand_ParseFail; 6036 } 6037 6038 Operands.push_back( 6039 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6040 return MatchOperand_Success; 6041 } 6042 6043 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6044